diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp index 790f11bb69c5..de7e4c341086 100644 --- a/clang-tools-extra/clang-doc/Mapper.cpp +++ b/clang-tools-extra/clang-doc/Mapper.cpp @@ -68,7 +68,7 @@ bool MapASTVisitor::VisitCXXMethodDecl(const CXXMethodDecl *D) { bool MapASTVisitor::VisitFunctionDecl(const FunctionDecl *D) { // Don't visit CXXMethodDecls twice - if (dyn_cast(D)) + if (isa(D)) return true; return mapDecl(D); } diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp index e132c56cb000..29762b6b54b1 100644 --- a/clang-tools-extra/clang-doc/Serialize.cpp +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -382,7 +382,7 @@ populateParentNamespaces(llvm::SmallVector &Namespaces, // corresponds to a Record and if it doesn't have any namespace (because this // means it's in the global namespace). Also if its outermost namespace is a // record because that record matches the previous condition mentioned. - if ((Namespaces.empty() && dyn_cast(D)) || + if ((Namespaces.empty() && isa(D)) || (!Namespaces.empty() && Namespaces.back().RefType == InfoType::IT_record)) Namespaces.emplace_back(SymbolID(), "GlobalNamespace", InfoType::IT_namespace); @@ -419,10 +419,10 @@ static void populateFunctionInfo(FunctionInfo &I, const FunctionDecl *D, populateSymbolInfo(I, D, FC, LineNumber, Filename, IsFileInRootDir, IsInAnonymousNamespace); if (const auto *T = getDeclForType(D->getReturnType())) { - if (dyn_cast(T)) + if (isa(T)) I.ReturnType = TypeInfo(getUSRForDecl(T), T->getNameAsString(), InfoType::IT_enum, getInfoRelativePath(T)); - else if (dyn_cast(T)) + else if (isa(T)) I.ReturnType = TypeInfo(getUSRForDecl(T), T->getNameAsString(), InfoType::IT_record, getInfoRelativePath(T)); } else { diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index b09079b5e8ba..6c5054fdca28 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -193,7 +193,8 @@ DiagnosticBuilder ClangTidyContext::diag(const ClangTidyError &Error) { SM.getFileManager().getFile(Error.Message.FilePath); FileID ID = SM.getOrCreateFileID(*File, SrcMgr::C_User); SourceLocation FileStartLoc = SM.getLocForStartOfFile(ID); - SourceLocation Loc = FileStartLoc.getLocWithOffset(Error.Message.FileOffset); + SourceLocation Loc = FileStartLoc.getLocWithOffset( + static_cast(Error.Message.FileOffset)); return diag(Error.DiagnosticName, Loc, Error.Message.Message, static_cast(Error.DiagLevel)); } diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h index 23800f5622f6..69f1ceddd9bd 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h @@ -241,14 +241,12 @@ getFixIt(const tooling::Diagnostic &Diagnostic, bool AnyFix); /// A diagnostic consumer that turns each \c Diagnostic into a /// \c SourceManager-independent \c ClangTidyError. -/// -/// \param EnableNolintBlocks Enables diagnostic-disabling inside blocks of -/// code, delimited by NOLINTBEGIN and NOLINTEND. -// // FIXME: If we move away from unit-tests, this can be moved to a private // implementation file. class ClangTidyDiagnosticConsumer : public DiagnosticConsumer { public: + /// \param EnableNolintBlocks Enables diagnostic-disabling inside blocks of + /// code, delimited by NOLINTBEGIN and NOLINTEND. ClangTidyDiagnosticConsumer(ClangTidyContext &Ctx, DiagnosticsEngine *ExternalDiagEngine = nullptr, bool RemoveIncompatibleErrors = true, diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp index aa839beddac6..c9f3a7db0346 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -221,7 +221,6 @@ void DurationFactoryScaleCheck::check(const MatchFinder::MatchResult &Result) { tooling::fixit::getText(*Remainder, *Result.Context) + ")") .str()); } - return; } } // namespace abseil diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp index 5741c0d505d5..e834c8a12459 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp @@ -40,7 +40,7 @@ void StringFindStartswithCheck::registerMatchers(MatchFinder *Finder) { auto StringFind = cxxMemberCallExpr( // .find()-call on a string... - callee(cxxMethodDecl(hasName("find"))), + callee(cxxMethodDecl(hasName("find")).bind("findfun")), on(hasType(StringType)), // ... with some search expression ... hasArgument(0, expr().bind("needle")), @@ -55,6 +55,25 @@ void StringFindStartswithCheck::registerMatchers(MatchFinder *Finder) { ignoringParenImpCasts(StringFind.bind("findexpr")))) .bind("expr"), this); + + auto StringRFind = cxxMemberCallExpr( + // .rfind()-call on a string... + callee(cxxMethodDecl(hasName("rfind")).bind("findfun")), + on(hasType(StringType)), + // ... with some search expression ... + hasArgument(0, expr().bind("needle")), + // ... and "0" as second argument. + hasArgument(1, ZeroLiteral)); + + Finder->addMatcher( + // Match [=!]= with either a zero or npos on one side and a string.rfind + // on the other. + binaryOperator( + hasAnyOperatorName("==", "!="), + hasOperands(ignoringParenImpCasts(ZeroLiteral), + ignoringParenImpCasts(StringRFind.bind("findexpr")))) + .bind("expr"), + this); } void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { @@ -69,6 +88,11 @@ void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { const Expr *Haystack = Result.Nodes.getNodeAs("findexpr") ->getImplicitObjectArgument(); assert(Haystack != nullptr); + const CXXMethodDecl *FindFun = + Result.Nodes.getNodeAs("findfun"); + assert(FindFun != nullptr); + + bool Rev = FindFun->getName().contains("rfind"); if (ComparisonExpr->getBeginLoc().isMacroID()) return; @@ -86,10 +110,11 @@ void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { bool Neg = ComparisonExpr->getOpcode() == BO_NE; // Create the warning message and a FixIt hint replacing the original expr. - auto Diagnostic = diag(ComparisonExpr->getBeginLoc(), - "use %select{absl::StartsWith|!absl::StartsWith}0 " - "instead of find() %select{==|!=}0 0") - << Neg; + auto Diagnostic = + diag(ComparisonExpr->getBeginLoc(), + "use %select{absl::StartsWith|!absl::StartsWith}0 " + "instead of %select{find()|rfind()}1 %select{==|!=}0 0") + << Neg << Rev; Diagnostic << FixItHint::CreateReplacement( ComparisonExpr->getSourceRange(), diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp index 64c8797934d2..d373877713f1 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp @@ -87,7 +87,7 @@ void CloexecCheck::insertStringFlag( // Check if the may be in the mode string. const auto *ModeStr = dyn_cast(ModeArg->IgnoreParenCasts()); - if (!ModeStr || (ModeStr->getString().find(Mode) != StringRef::npos)) + if (!ModeStr || ModeStr->getString().contains(Mode)) return; std::string ReplacementText = buildFixMsgForStringFlag( diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp index 8da046955425..4bf841648f94 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -64,7 +64,7 @@ static std::string collapseConsecutive(StringRef Str, char C) { static bool hasReservedDoubleUnderscore(StringRef Name, const LangOptions &LangOpts) { if (LangOpts.CPlusPlus) - return Name.find("__") != StringRef::npos; + return Name.contains("__"); return Name.startswith("__"); } diff --git a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp index 42b697076b35..842bf43b2c45 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp @@ -41,7 +41,6 @@ void ImplementationInNamespaceCheck::check( diag(MatchedDecl->getLocation(), "declaration must be declared within the '%0' namespace") << RequiredNamespace; - return; } } // namespace llvm_libc diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index 432d929057d1..b0b882be1a6c 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -829,7 +829,7 @@ bool LoopConvertCheck::isConvertible(ASTContext *Context, } else if (FixerKind == LFK_PseudoArray) { // This call is required to obtain the container. const auto *EndCall = Nodes.getNodeAs(EndCallName); - if (!EndCall || !dyn_cast(EndCall->getCallee())) + if (!EndCall || !isa(EndCall->getCallee())) return false; } return true; diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp index 26b1d8ecdc31..40dda98b1e49 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -25,7 +25,7 @@ bool containsEscapes(StringRef HayStack, StringRef Escapes) { return false; while (BackSlash != StringRef::npos) { - if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos) + if (!Escapes.contains(HayStack[BackSlash + 1])) return false; BackSlash = HayStack.find('\\', BackSlash + 2); } diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp index 7dc519c15282..07e962a07e84 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp @@ -81,7 +81,7 @@ static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM, SourceRange TokRange(Loc, TokEndLoc); StringRef Comment = Lexer::getSourceText( CharSourceRange::getTokenRange(TokRange), SM, Context->getLangOpts()); - if (Comment.startswith("/*") && Comment.find('\n') != StringRef::npos) { + if (Comment.startswith("/*") && Comment.contains('\n')) { // Multi-line block comment, insert brace before. break; } diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp index 4f81dc49ded7..c8a8edf67884 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp @@ -71,7 +71,7 @@ void NamedParameterCheck::check(const MatchFinder::MatchResult &Result) { const char *Begin = SM.getCharacterData(Parm->getBeginLoc()); const char *End = SM.getCharacterData(Parm->getLocation()); StringRef Data(Begin, End - Begin); - if (Data.find("/*") != StringRef::npos) + if (Data.contains("/*")) continue; UnnamedParams.push_back(std::make_pair(Function, I)); diff --git a/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst b/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst index 43f35ac66c8a..8224c37a087b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst @@ -3,14 +3,15 @@ abseil-string-find-startswith ============================= -Checks whether a ``std::string::find()`` result is compared with 0, and -suggests replacing with ``absl::StartsWith()``. This is both a readability and -performance issue. +Checks whether a ``std::string::find()`` or ``std::string::rfind()`` result is +compared with 0, and suggests replacing with ``absl::StartsWith()``. This is +both a readability and performance issue. .. code-block:: c++ string s = "..."; if (s.find("Hello World") == 0) { /* do something */ } + if (s.rfind("Hello World", 0) == 0) { /* do something */ } becomes @@ -19,6 +20,7 @@ becomes string s = "..."; if (absl::StartsWith(s, "Hello World")) { /* do something */ } + if (absl::StartsWith(s, "Hello World")) { /* do something */ } Options diff --git a/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp b/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp index 89365bfa1f81..7a1fd8233d83 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp @@ -1,6 +1,8 @@ // RUN: %check_clang_tidy %s abseil-string-find-startswith %t -- \ // RUN: -config="{CheckOptions: [{key: 'abseil-string-find-startswith.StringLikeClasses', value: '::std::basic_string;::basic_string'}]}" +using size_t = decltype(sizeof(int)); + namespace std { template class allocator {}; template class char_traits {}; @@ -13,12 +15,17 @@ struct basic_string { ~basic_string(); int find(basic_string s, int pos = 0); int find(const char *s, int pos = 0); + int rfind(basic_string s, int pos = npos); + int rfind(const char *s, int pos = npos); + static constexpr size_t npos = -1; }; typedef basic_string string; typedef basic_string wstring; struct cxx_string { int find(const char *s, int pos = 0); + int rfind(const char *s, int pos = npos); + static constexpr size_t npos = -1; }; } // namespace std @@ -61,9 +68,42 @@ void tests(std::string s, global_string s2) { // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s2, "a");{{$}} + s.rfind("a", 0) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith instead of rfind() == 0 [abseil-string-find-startswith] + // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s, "a");{{$}} + + s.rfind(s, 0) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s, s);{{$}} + + s.rfind("aaa", 0) != 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use !absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}!absl::StartsWith(s, "aaa");{{$}} + + s.rfind(foo(foo(bar())), 0) != 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use !absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}!absl::StartsWith(s, foo(foo(bar())));{{$}} + + if (s.rfind("....", 0) == 0) { /* do something */ } + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}if (absl::StartsWith(s, "....")) { /* do something */ }{{$}} + + 0 != s.rfind("a", 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use !absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}!absl::StartsWith(s, "a");{{$}} + + s2.rfind("a", 0) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s2, "a");{{$}} + // expressions that don't trigger the check are here. A_MACRO(s.find("a"), 0); + A_MACRO(s.rfind("a", 0), 0); s.find("a", 1) == 0; s.find("a", 1) == 1; s.find("a") == 1; + s.rfind("a", 1) == 0; + s.rfind("a", 1) == 1; + s.rfind("a") == 0; + s.rfind("a") == 1; } diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index ce66ef58fca6..26da5a0ff255 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3093,6 +3093,15 @@ There is a set of concrete HW architectures that OpenCL can be compiled for. Generic Targets ^^^^^^^^^^^^^^^ +- A SPIR-V binary can be produced for 32 or 64 bit targets. + + .. code-block:: console + + $ clang -target spirv32 test.cl + $ clang -target spirv64 test.cl + + More details can be found in :ref:`the SPIR-V support section `. + - SPIR is available as a generic target to allow portable bitcode to be produced that can be used across GPU toolchains. The implementation follows `the SPIR specification `_. There are two flavors @@ -3510,6 +3519,51 @@ Clang expects the GCC executable "gcc.exe" compiled for `Some tests might fail `_ on ``x86_64-w64-mingw32``. +.. _spir-v: + +SPIR-V support +-------------- + +Clang supports generation of SPIR-V conformant to `the OpenCL Environment +Specification +`_. + +To generate SPIR-V binaries, Clang uses the external ``llvm-spirv`` tool from the +`SPIRV-LLVM-Translator repo +`_. + +Prior to the generation of SPIR-V binary with Clang, ``llvm-spirv`` +should be built or installed. Please refer to `the following instructions +`_ +for more details. Clang will expects the ``llvm-spirv`` executable to +be present in the ``PATH`` environment variable. Clang uses ``llvm-spirv`` +with `the widely adopted assembly syntax package +`_. + +`The versioning +`_ of +``llvm-spirv`` is aligned with Clang major releases. The same applies to the +main development branch. It is therefore important to ensure the ``llvm-spirv`` +version is in alignment with the Clang version. For troubleshooting purposes +``llvm-spirv`` can be `tested in isolation +`_. + +Example usage for OpenCL kernel compilation: + + .. code-block:: console + + $ clang -target spirv32 test.cl + $ clang -target spirv64 test.cl + +Both invocations of Clang will result in the generation of a SPIR-V binary file +`test.o` for 32 bit and 64 bit respectively. This file can be imported +by an OpenCL driver that support SPIR-V consumption or it can be compiled +further by offline SPIR-V consumer tools. + +Converting to SPIR-V produced with the optimization levels other than `-O0` is +currently available as an experimental feature and it is not guaranteed to work +in all cases. + .. _clang-cl: clang-cl diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index d9bf2f07291f..0d97e9ad8623 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -240,7 +240,7 @@ class ObjCContextInfo : public CommonTypeInfo { } void setSwiftImportAsNonGeneric(llvm::Optional Value) { SwiftImportAsNonGenericSpecified = Value.hasValue(); - SwiftImportAsNonGeneric = Value.hasValue() ? *Value : false; + SwiftImportAsNonGeneric = Value.getValueOr(false); } llvm::Optional getSwiftObjCMembers() const { @@ -249,7 +249,7 @@ class ObjCContextInfo : public CommonTypeInfo { } void setSwiftObjCMembers(llvm::Optional Value) { SwiftObjCMembersSpecified = Value.hasValue(); - SwiftObjCMembers = Value.hasValue() ? *Value : false; + SwiftObjCMembers = Value.getValueOr(false); } /// Strip off any information within the class information structure that is @@ -368,7 +368,7 @@ class ObjCPropertyInfo : public VariableInfo { } void setSwiftImportAsAccessors(llvm::Optional Value) { SwiftImportAsAccessorsSpecified = Value.hasValue(); - SwiftImportAsAccessors = Value.hasValue() ? *Value : false; + SwiftImportAsAccessors = Value.getValueOr(false); } friend bool operator==(const ObjCPropertyInfo &, const ObjCPropertyInfo &); @@ -433,7 +433,7 @@ class ParamInfo : public VariableInfo { } void setNoEscape(llvm::Optional Value) { NoEscapeSpecified = Value.hasValue(); - NoEscape = Value.hasValue() ? *Value : false; + NoEscape = Value.getValueOr(false); } llvm::Optional getRetainCountConvention() const { @@ -671,7 +671,7 @@ class TagInfo : public CommonTypeInfo { } void setFlagEnum(llvm::Optional Value) { HasFlagEnum = Value.hasValue(); - IsFlagEnum = Value.hasValue() ? *Value : false; + IsFlagEnum = Value.getValueOr(false); } TagInfo &operator|=(const TagInfo &RHS) { diff --git a/clang/include/clang/AST/AbstractBasicReader.h b/clang/include/clang/AST/AbstractBasicReader.h index 5505d661b44e..442039044cfe 100644 --- a/clang/include/clang/AST/AbstractBasicReader.h +++ b/clang/include/clang/AST/AbstractBasicReader.h @@ -21,7 +21,7 @@ inline T makeNullableFromOptional(const Optional &value) { template inline T *makePointerFromOptional(Optional value) { - return (value ? *value : nullptr); + return value.getValueOr(nullptr); } // PropertyReader is a class concept that requires the following method: diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index d33babef958e..f7a2e3146d06 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1211,13 +1211,12 @@ class TemplateTypeParmDecl final : public TypeDecl, DefArgStorage DefaultArgument; TemplateTypeParmDecl(DeclContext *DC, SourceLocation KeyLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - bool Typename, bool HasTypeConstraint, - Optional NumExpanded) + SourceLocation IdLoc, IdentifierInfo *Id, bool Typename, + bool HasTypeConstraint, Optional NumExpanded) : TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename), - HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false), - ExpandedParameterPack(NumExpanded), - NumExpanded(NumExpanded ? *NumExpanded : 0) {} + HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false), + ExpandedParameterPack(NumExpanded), + NumExpanded(NumExpanded.getValueOr(0)) {} public: static TemplateTypeParmDecl *Create(const ASTContext &C, DeclContext *DC, diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 565eb0c9cf99..3fd1b6d30080 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2224,6 +2224,47 @@ class OMPCaptureClause : public OMPClause { } }; +/// This represents 'compare' clause in the '#pragma omp atomic' +/// directive. +/// +/// \code +/// #pragma omp atomic compare +/// \endcode +/// In this example directive '#pragma omp atomic' has 'compare' clause. +class OMPCompareClause final : public OMPClause { +public: + /// Build 'compare' clause. + /// + /// \param StartLoc Starting location of the clause. + /// \param EndLoc Ending location of the clause. + OMPCompareClause(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_compare, StartLoc, EndLoc) {} + + /// Build an empty clause. + OMPCompareClause() + : OMPClause(llvm::omp::OMPC_compare, SourceLocation(), SourceLocation()) { + } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_compare; + } +}; + /// This represents 'seq_cst' clause in the '#pragma omp atomic' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 8bcee8790e7b..f62dc36de556 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3234,6 +3234,11 @@ bool RecursiveASTVisitor::VisitOMPCaptureClause(OMPCaptureClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPCompareClause(OMPCompareClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPSeqCstClause(OMPSeqCstClause *) { return true; diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index 4a58fe870944..a0ae44131b45 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -155,7 +155,7 @@ class CFGWalker { return false; // Ignore anonymous functions. - if (!dyn_cast_or_null(AC.getDecl())) + if (!isa_and_nonnull(AC.getDecl())) return false; SortedGraph = AC.getAnalysis(); diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index f57635e010dc..45d445163749 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -649,6 +649,7 @@ BUILTIN(__builtin_elementwise_min, "v.", "nct") BUILTIN(__builtin_elementwise_ceil, "v.", "nct") BUILTIN(__builtin_reduce_max, "v.", "nct") BUILTIN(__builtin_reduce_min, "v.", "nct") +BUILTIN(__builtin_reduce_xor, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") diff --git a/clang/include/clang/Basic/BuiltinsHexagon.def b/clang/include/clang/Basic/BuiltinsHexagon.def index 0001bd556117..0f62c235bb62 100644 --- a/clang/include/clang/Basic/BuiltinsHexagon.def +++ b/clang/include/clang/Basic/BuiltinsHexagon.def @@ -17,8 +17,10 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif +#pragma push_macro("V69") +#define V69 "v69" #pragma push_macro("V68") -#define V68 "v68" +#define V68 "v68|" V69 #pragma push_macro("V67") #define V67 "v67|" V68 #pragma push_macro("V66") @@ -34,8 +36,10 @@ #pragma push_macro("V5") #define V5 "v5|" V55 +#pragma push_macro("HVXV69") +#define HVXV69 "hvxv69" #pragma push_macro("HVXV68") -#define HVXV68 "hvxv68" +#define HVXV68 "hvxv68|" HVXV69 #pragma push_macro("HVXV67") #define HVXV67 "hvxv67|" HVXV68 #pragma push_macro("HVXV66") @@ -128,6 +132,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_acc_128B,"V64iV64iV32iLLi","", " #pragma pop_macro("HVXV66") #pragma pop_macro("HVXV67") #pragma pop_macro("HVXV68") +#pragma pop_macro("HVXV69") #pragma pop_macro("V5") #pragma pop_macro("V55") @@ -137,6 +142,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_acc_128B,"V64iV64iV32iLLi","", " #pragma pop_macro("V66") #pragma pop_macro("V67") #pragma pop_macro("V68") +#pragma pop_macro("V69") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsHexagonDep.def b/clang/include/clang/Basic/BuiltinsHexagonDep.def index 152c9c4dd8ad..2eb4ca69c7bd 100644 --- a/clang/include/clang/Basic/BuiltinsHexagonDep.def +++ b/clang/include/clang/Basic/BuiltinsHexagonDep.def @@ -1739,3 +1739,150 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10, "V32iV32iV32iUIi", "", HVXV68) TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10_128B, "V64iV64iV64iUIi", "", HVXV68) TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10_vxx, "V32iV32iV32iV32iUIi", "", HVXV68) TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10_vxx_128B, "V64iV64iV64iV64iUIi", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_sf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_sf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vassign_fp, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vassign_fp_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf16, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf16_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf32, "V16iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf32_128B, "V32iV64i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_qf32, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_qf32_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_b_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_b_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_h_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_h_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_b, "V32iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_b_128B, "V64iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_h, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_h_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_ub, "V32iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_ub_128B, "V64iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_uh, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_uh_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_sf_hf, "V32iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_sf_hf_128B, "V64iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_ub_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_ub_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_uh_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_uh_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc, "V16iV16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc_128B, "V32iV32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_sf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_sf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf, "V64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_128B, "V128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_and, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_and_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_or, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_or_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_xor, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_xor_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf, "V64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_128B, "V128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_and, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_and_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_or, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_or_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_xor, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_xor_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf_acc, "V16iV16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf_acc_128B, "V32iV32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_qf16, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_qf16_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf_acc, "V32iV32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf_acc_128B, "V64iV64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_sf_128B, "V32iV32iV32i", "", HVXV68) + +// V69 HVX Instructions. + +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubrndsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubrndsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhrndsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhrndsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs, "V16iV16iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs_128B, "V32iV32iV32i", "", HVXV69) diff --git a/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def index 93f560fc5adc..389eadb21d01 100644 --- a/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def +++ b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def @@ -8,199 +8,7 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// -CUSTOM_BUILTIN_MAPPING(A2_add, 0) -CUSTOM_BUILTIN_MAPPING(A2_addi, 0) -CUSTOM_BUILTIN_MAPPING(A2_addp, 0) -CUSTOM_BUILTIN_MAPPING(A2_and, 0) -CUSTOM_BUILTIN_MAPPING(A2_andir, 0) -CUSTOM_BUILTIN_MAPPING(A2_neg, 0) -CUSTOM_BUILTIN_MAPPING(A2_not, 0) -CUSTOM_BUILTIN_MAPPING(A2_or, 0) -CUSTOM_BUILTIN_MAPPING(A2_orir, 0) -CUSTOM_BUILTIN_MAPPING(A2_sub, 0) -CUSTOM_BUILTIN_MAPPING(A2_subp, 0) -CUSTOM_BUILTIN_MAPPING(A2_subri, 0) -CUSTOM_BUILTIN_MAPPING(A2_sxtb, 0) -CUSTOM_BUILTIN_MAPPING(A2_sxth, 0) -CUSTOM_BUILTIN_MAPPING(A2_xor, 0) -CUSTOM_BUILTIN_MAPPING(A2_zxtb, 0) -CUSTOM_BUILTIN_MAPPING(A2_zxth, 0) -CUSTOM_BUILTIN_MAPPING(M2_dpmpyss_s0, 0) -CUSTOM_BUILTIN_MAPPING(M2_dpmpyuu_s0, 0) -CUSTOM_BUILTIN_MAPPING(M2_mpyi, 0) -CUSTOM_BUILTIN_MAPPING(M2_mpysmi, 0) -CUSTOM_BUILTIN_MAPPING(M2_mpyui, 0) -CUSTOM_BUILTIN_MAPPING(S2_asl_i_p, 0) -CUSTOM_BUILTIN_MAPPING(S2_asl_i_r, 0) -CUSTOM_BUILTIN_MAPPING(S2_asr_i_p, 0) -CUSTOM_BUILTIN_MAPPING(S2_asr_i_r, 0) -CUSTOM_BUILTIN_MAPPING(S2_lsr_i_p, 0) -CUSTOM_BUILTIN_MAPPING(S2_lsr_i_r, 0) -CUSTOM_BUILTIN_MAPPING(V6_pred_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_and_n, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_and_n_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_not, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_not_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_or_n, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_or_n_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddbnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddbnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddbq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddbq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddhnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddhnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddwnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddwnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vmux, 64) -CUSTOM_BUILTIN_MAPPING(V6_vmux_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubbnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubbnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubbq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubbq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubhnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubhnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubwnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubwnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vswap, 64) -CUSTOM_BUILTIN_MAPPING(V6_vswap_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqh, 64) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqw, 64) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqw_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vaddcarry, 64) CUSTOM_BUILTIN_MAPPING(V6_vaddcarry_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvnqv, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvnqv_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvqv, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvqv_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vsubcarry, 64) CUSTOM_BUILTIN_MAPPING(V6_vsubcarry_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgathermwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgathermwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqb, 64) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqb_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqh, 64) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqw, 64) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vscattermwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vscattermwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat_128B, 128) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 94b3003a9c33..723302f108e2 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -368,9 +368,6 @@ ENUM_CODEGENOPT(DebuggerTuning, llvm::DebuggerKind, 3, /// emitted. VALUE_CODEGENOPT(DwarfVersion, 3, 0) -/// Whether to use experimental new variable location tracking. -CODEGENOPT(ValueTrackingVariableLocations, 1, 0) - /// Whether we should emit CodeView debug information. It's possible to emit /// CodeView and DWARF into the same object. CODEGENOPT(EmitCodeView, 1, 0) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index d4781b647b87..33ec03a17136 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -398,7 +398,7 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Executable and command-line used to create a given CompilerInvocation. /// Most of the time this will be the full -cc1 command. const char *Argv0 = nullptr; - ArrayRef CommandLineArgs; + std::vector CommandLineArgs; /// The minimum hotness value a diagnostic needs in order to be included in /// optimization diagnostics. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 2aeb987725e1..a7fd2f26478c 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -116,6 +116,10 @@ def warn_drv_unsupported_option_for_target : Warning< "ignoring '%0' option as it is not currently supported for target '%1'">, InGroup; +def warn_drv_spirv_linking_multiple_inputs_unsupported: Warning< + "Linking multiple input files is not supported for SPIR-V yet">, + InGroup; + def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< @@ -439,11 +443,13 @@ def err_analyzer_checker_option_invalid_input : Error< def err_analyzer_checker_incompatible_analyzer_option : Error< "checker cannot be enabled with analyzer option '%0' == %1">; -def err_drv_invalid_hvx_length : Error< - "-mhvx-length is not supported without a -mhvx/-mhvx= flag">; -def warn_drv_vectorize_needs_hvx : Warning< - "auto-vectorization requires HVX, use -mhvx to enable it">, +def warn_drv_needs_hvx : Warning< + "%0 requires HVX, use -mhvx/-mhvx= to enable it">, InGroup; +def err_drv_needs_hvx : Error< + "%0 requires HVX, use -mhvx/-mhvx= to enable it">; +def err_drv_needs_hvx_version : Error< + "%0 is not supported on HVX %1">; def err_drv_module_header_wrong_kind : Error< "header file '%0' input type '%1' does not match type of prior input " @@ -559,7 +565,7 @@ def warn_drv_moutline_unsupported_opt : Warning< InGroup; def warn_drv_moutline_atomics_unsupported_opt : Warning< - "'%0' does not support '-moutline-atomics'; flag ignored">, + "'%0' does not support '-%1'; flag ignored">, InGroup; def warn_drv_darwin_sdk_invalid_settings : Warning< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 875e639cb2be..f2089bfda04d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10509,7 +10509,7 @@ def err_omp_atomic_capture_not_compound_statement : Error< def note_omp_atomic_capture: Note< "%select{expected assignment expression|expected compound statement|expected exactly two expression statements|expected in right hand side of the first expression}0">; def err_omp_atomic_several_clauses : Error< - "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause">; + "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< "directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 'relaxed', |}1'acq_rel', 'acquire' or 'release' clause">; def err_omp_atomic_incompatible_mem_order_clause : Error< @@ -11071,8 +11071,6 @@ def err_coroutine_type_missing_specialization : Error< "specialization %0">; def err_coroutine_promise_incompatible_return_functions : Error< "the coroutine promise type %0 declares both 'return_value' and 'return_void'">; -def err_coroutine_promise_requires_return_function : Error< - "the coroutine promise type %0 must declare either 'return_value' or 'return_void'">; def note_coroutine_promise_implicit_await_transform_required_here : Note< "call to 'await_transform' implicitly required by 'co_await' here">; def note_coroutine_promise_suspend_implicitly_required : Note< @@ -11378,7 +11376,8 @@ def err_builtin_invalid_arg_type: Error < "%select{vector, integer or floating point type|matrix|" "pointer to a valid matrix element type|" "signed integer or floating point type|vector type|" - "floating point type}1 (was %2)">; + "floating point type|" + "vector of integers}1 (was %2)">; def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 08e9e1a3432a..dc8bd831f2a2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4160,6 +4160,8 @@ def mv67t : Flag<["-"], "mv67t">, Group, Alias, AliasArgs<["hexagonv67t"]>; def mv68 : Flag<["-"], "mv68">, Group, Alias, AliasArgs<["hexagonv68"]>; +def mv69 : Flag<["-"], "mv69">, Group, + Alias, AliasArgs<["hexagonv69"]>; def mhexagon_hvx : Flag<["-"], "mhvx">, Group, HelpText<"Enable Hexagon Vector eXtensions">; def mhexagon_hvx_EQ : Joined<["-"], "mhvx=">, @@ -4171,6 +4173,18 @@ def mno_hexagon_hvx : Flag<["-"], "mno-hvx">, def mhexagon_hvx_length_EQ : Joined<["-"], "mhvx-length=">, Group, HelpText<"Set Hexagon Vector Length">, Values<"64B,128B">; +def mhexagon_hvx_qfloat : Flag<["-"], "mhvx-qfloat">, + Group, + HelpText<"Enable Hexagon HVX QFloat instructions">; +def mno_hexagon_hvx_qfloat : Flag<["-"], "mno-hvx-qfloat">, + Group, + HelpText<"Disable Hexagon HVX QFloat instructions">; +def mhexagon_hvx_ieee_fp : Flag<["-"], "mhvx-ieee-fp">, + Group, + HelpText<"Enable Hexagon HVX IEEE floating-point">; +def mno_hexagon_hvx_ieee_fp : Flag<["-"], "mno-hvx-ieee-fp">, + Group, + HelpText<"Disable Hexagon HVX IEEE floating-point">; def ffixed_r19: Flag<["-"], "ffixed-r19">, HelpText<"Reserve register r19 (Hexagon only)">; def mmemops : Flag<["-"], "mmemops">, Group, diff --git a/clang/include/clang/Driver/Tool.h b/clang/include/clang/Driver/Tool.h index cc0a09fb2747..42cf99a4a970 100644 --- a/clang/include/clang/Driver/Tool.h +++ b/clang/include/clang/Driver/Tool.h @@ -52,6 +52,7 @@ class Tool { const ToolChain &getToolChain() const { return TheToolChain; } virtual bool hasIntegratedAssembler() const { return false; } + virtual bool hasIntegratedBackend() const { return true; } virtual bool canEmitIR() const { return false; } virtual bool hasIntegratedCPP() const = 0; virtual bool isLinkJob() const { return false; } diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index e7b13eec3fc1..4afc9bf36b5f 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -384,6 +384,9 @@ class ToolChain { /// Check if the toolchain should use the integrated assembler. virtual bool useIntegratedAs() const; + /// Check if the toolchain should use the integrated backend. + virtual bool useIntegratedBackend() const { return true; } + /// Check if the toolchain should use AsmParser to parse inlineAsm when /// integrated assembler is not default. virtual bool parseInlineAsmUsingAsmParser() const { return false; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 73c5ad1dd7ac..79834554a50d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11190,6 +11190,9 @@ class Sema final { /// Called on well-formed 'capture' clause. OMPClause *ActOnOpenMPCaptureClause(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed 'compare' clause. + OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed 'seq_cst' clause. OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc); @@ -12784,7 +12787,7 @@ class Sema final { bool SemaBuiltinElementwiseMath(CallExpr *TheCall); bool PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall); - bool SemaBuiltinReduceMath(CallExpr *TheCall); + bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall); // Matrix builtin handling. ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 58bd7b6a4a8c..008b703d4c1a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9272,7 +9272,7 @@ void getIntersectionOfProtocols(ASTContext &Context, // Remove any implied protocols from the list of inherited protocols. if (!ImpliedProtocols.empty()) { llvm::erase_if(IntersectionSet, [&](ObjCProtocolDecl *proto) -> bool { - return ImpliedProtocols.count(proto) > 0; + return ImpliedProtocols.contains(proto); }); } diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 86879b8c3533..ae585def4d07 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -744,11 +744,18 @@ void JSONNodeDumper::VisitNamedDecl(const NamedDecl *ND) { JOS.attribute("name", ND->getNameAsString()); // FIXME: There are likely other contexts in which it makes no sense to ask // for a mangled name. - if (!isa(ND->getDeclContext())) { - std::string MangledName = ASTNameGen.getName(ND); - if (!MangledName.empty()) - JOS.attribute("mangledName", MangledName); - } + if (isa(ND->getDeclContext())) + return; + + // Mangled names are not meaningful for locals, and may not be well-defined + // in the case of VLAs. + auto *VD = dyn_cast(ND); + if (VD && VD->hasLocalStorage()) + return; + + std::string MangledName = ASTNameGen.getName(ND); + if (!MangledName.empty()) + JOS.attribute("mangledName", MangledName); } } diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 56e140f10710..1bd049b88005 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -126,6 +126,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -217,6 +218,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -1792,6 +1794,10 @@ void OMPClausePrinter::VisitOMPCaptureClause(OMPCaptureClause *) { OS << "capture"; } +void OMPClausePrinter::VisitOMPCompareClause(OMPCompareClause *) { + OS << "compare"; +} + void OMPClausePrinter::VisitOMPSeqCstClause(OMPSeqCstClause *) { OS << "seq_cst"; } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 4339c249e027..09853e0f0e49 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -551,6 +551,8 @@ void OMPClauseProfiler::VisitOMPUpdateClause(const OMPUpdateClause *) {} void OMPClauseProfiler::VisitOMPCaptureClause(const OMPCaptureClause *) {} +void OMPClauseProfiler::VisitOMPCompareClause(const OMPCompareClause *) {} + void OMPClauseProfiler::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseProfiler::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/clang/lib/ASTMatchers/Dynamic/Marshallers.h index 783fb203c408..fa9d42247e24 100644 --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.h +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.h @@ -1035,7 +1035,6 @@ class MapAnyOfBuilderDescriptor : public MatcherDescriptor { void getArgKinds(ASTNodeKind ThisKind, unsigned, std::vector &ArgKinds) const override { ArgKinds.push_back(ArgKind::MakeNodeArg(ThisKind)); - return; } bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity = nullptr, ASTNodeKind *LeastDerivedKind = nullptr) const override { diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp index c6a77bb6c2e0..cab1476acf94 100644 --- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -645,7 +645,7 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, Tokenizer->SkipNewlines(); { - ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); + ScopedContextEntry SCE(this, Ctor.getValueOr(nullptr)); while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index abf65e3efce9..9ef3b5b6277a 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1820,8 +1820,6 @@ void CFGBuilder::addScopesEnd(LocalScope::const_iterator B, for (VarDecl *VD : llvm::reverse(DeclsWithEndedScope)) appendScopeEnd(Block, VD, S); - - return; } /// addAutomaticObjDtors - Add to current block automatic objects destructors diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 9e74e05bd863..1761c6d3d89b 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -163,6 +163,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -428,6 +429,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Basic/Targets/Hexagon.cpp b/clang/lib/Basic/Targets/Hexagon.cpp index 9c37dee7e89a..161369242926 100644 --- a/clang/lib/Basic/Targets/Hexagon.cpp +++ b/clang/lib/Basic/Targets/Hexagon.cpp @@ -68,6 +68,9 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts, } else if (CPU == "hexagonv68") { Builder.defineMacro("__HEXAGON_V68__"); Builder.defineMacro("__HEXAGON_ARCH__", "68"); + } else if (CPU == "hexagonv69") { + Builder.defineMacro("__HEXAGON_V69__"); + Builder.defineMacro("__HEXAGON_ARCH__", "69"); } if (hasFeature("hvx-length64b")) { @@ -128,6 +131,10 @@ bool HexagonTargetInfo::handleTargetFeatures(std::vector &Features, else if (F == "+audio") HasAudio = true; } + if (CPU.compare("hexagonv68") >= 0) { + HasLegalHalfType = true; + HasFloat16 = true; + } return true; } @@ -214,7 +221,7 @@ static constexpr CPUSuffix Suffixes[] = { {{"hexagonv60"}, {"60"}}, {{"hexagonv62"}, {"62"}}, {{"hexagonv65"}, {"65"}}, {{"hexagonv66"}, {"66"}}, {{"hexagonv67"}, {"67"}}, {{"hexagonv67t"}, {"67t"}}, - {{"hexagonv68"}, {"68"}}, + {{"hexagonv68"}, {"68"}}, {{"hexagonv69"}, {"69"}}, }; const char *HexagonTargetInfo::getHexagonCPUSuffix(StringRef Name) { diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index c3c61ed443ca..7f7b44b658eb 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -623,14 +623,11 @@ void PPCTargetInfo::addP10SpecificFeatures( Features["pcrelative-memops"] = true; Features["prefix-instrs"] = true; Features["isa-v31-instructions"] = true; - return; } // Add features specific to the "Future" CPU. void PPCTargetInfo::addFutureSpecificFeatures( - llvm::StringMap &Features) const { - return; -} + llvm::StringMap &Features) const {} bool PPCTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 5e16d3525b38..bacac0a20d4d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -603,8 +603,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI; - Options.ValueTrackingVariableLocations = - CodeGenOpts.ValueTrackingVariableLocations; Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; Options.LoopAlignment = CodeGenOpts.LoopAlignment; diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 5639abf56982..7c9f41e84eaf 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -258,6 +258,21 @@ class CGBuilderTy : public CGBuilderBaseTy { Addr.getAlignment().alignmentAtOffset(Index * EltSize)); } + /// Create GEP with single dynamic index. The address alignment is reduced + /// according to the element size. + using CGBuilderBaseTy::CreateGEP; + Address CreateGEP(Address Addr, llvm::Value *Index, + const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType())); + + return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), Index, + Name), + Addr.getElementType(), + Addr.getAlignment().alignmentOfArrayElement(EltSize)); + } + /// Given a pointer to i8, adjust it by a given constant offset. Address CreateConstInBoundsByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name = "") { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2cbc8f77bd39..1982b40ff667 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3211,6 +3211,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_reduce_xor: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + llvm::Intrinsic::vector_reduce_xor, Op0, nullptr, "rdx.xor"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); @@ -18589,6 +18596,7 @@ getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) { CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) + // Legacy builtins that take a vector in place of a vector predicate. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) @@ -18726,6 +18734,27 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractValue(Result, 0); } + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: { + SmallVector Ops; + const Expr *PredOp = E->getArg(0); + // There will be an implicit cast to a boolean vector. Strip it. + if (auto *Cast = dyn_cast(PredOp)) { + if (Cast->getCastKind() == CK_BitCast) + PredOp = Cast->getSubExpr(); + Ops.push_back(V2Q(EmitScalarExpr(PredOp))); + } + for (int i = 1, e = E->getNumArgs(); i != e; ++i) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: @@ -18762,40 +18791,6 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); case Hexagon::BI__builtin_brev_ldd: return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); - - default: { - if (ID == Intrinsic::not_intrinsic) - return nullptr; - - auto IsVectorPredTy = [](llvm::Type *T) { - return T->isVectorTy() && - cast(T)->getElementType()->isIntegerTy(1); - }; - - llvm::Function *IntrFn = CGM.getIntrinsic(ID); - llvm::FunctionType *IntrTy = IntrFn->getFunctionType(); - SmallVector Ops; - for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) { - llvm::Type *T = IntrTy->getParamType(i); - const Expr *A = E->getArg(i); - if (IsVectorPredTy(T)) { - // There will be an implicit cast to a boolean vector. Strip it. - if (auto *Cast = dyn_cast(A)) { - if (Cast->getCastKind() == CK_BitCast) - A = Cast->getSubExpr(); - } - Ops.push_back(V2Q(EmitScalarExpr(A))); - } else { - Ops.push_back(EmitScalarExpr(A)); - } - } - - llvm::Value *Call = Builder.CreateCall(IntrFn, Ops); - if (IsVectorPredTy(IntrTy->getReturnType())) - Call = Q2V(Call); - - return Call; - } // default } // switch return nullptr; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b202326cf757..d70f78fea6b4 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -3469,12 +3469,19 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case TEK_Aggregate: // Do nothing; aggregrates get evaluated directly into the destination. break; - case TEK_Scalar: - EmitStoreOfScalar(Builder.CreateLoad(ReturnValue), - MakeNaturalAlignAddrLValue(&*AI, RetTy), - /*isInit*/ true); + case TEK_Scalar: { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + CharUnits Alignment = + CGM.getNaturalTypeAlignment(RetTy, &BaseInfo, &TBAAInfo); + Address ArgAddr(&*AI, ConvertType(RetTy), Alignment); + LValue ArgVal = + LValue::MakeAddr(ArgAddr, RetTy, getContext(), BaseInfo, TBAAInfo); + EmitStoreOfScalar( + Builder.CreateLoad(ReturnValue), ArgVal, /*isInit*/ true); break; } + } break; } diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index ca071d3d2e80..2041d2a5b4c9 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -597,6 +597,10 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; + // We need to emit `get_­return_­object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_­return_­object is sequenced before the call to + // initial_­suspend and is invoked at most once. GetReturnObjectManager GroManager(*this, S); GroManager.EmitGroAlloca(); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 577252fdfeac..34b4951a7f72 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1309,7 +1309,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { const ConstantExpr *CE = cast(E); if (llvm::Value *Result = ConstantEmitter(*this).tryEmitConstantExpr(CE)) { QualType RetType = cast(CE->getSubExpr()->IgnoreImplicit()) - ->getCallReturnType(getContext()); + ->getCallReturnType(getContext()) + ->getPointeeType(); return MakeNaturalAlignAddrLValue(Result, RetType); } return EmitLValue(cast(E)->getSubExpr()); diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index ca4450a8cf1c..0571c498c377 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1052,13 +1052,8 @@ void CodeGenFunction::EmitNewArrayInitializer( InitListElements = cast(ILE->getType()->getAsArrayTypeUnsafe()) ->getSize().getZExtValue(); - CurPtr = - Address(Builder.CreateInBoundsGEP(CurPtr.getElementType(), - CurPtr.getPointer(), - Builder.getSize(InitListElements), - "string.init.end"), - CurPtr.getAlignment().alignmentAtOffset(InitListElements * - ElementSize)); + CurPtr = Builder.CreateConstInBoundsGEP( + CurPtr, InitListElements, "string.init.end"); // Zero out the rest, if any remain. llvm::ConstantInt *ConstNum = dyn_cast(NumElements); diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index ac26f0d4232c..b5bcf157036d 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -3915,8 +3915,8 @@ static llvm::Value *emitIsPlatformVersionAtLeast(CodeGenFunction &CGF, Args.push_back( llvm::ConstantInt::get(CGM.Int32Ty, getBaseMachOPlatformID(TT))); Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor())); - Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Min ? *Min : 0)); - Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, SMin ? *SMin : 0)); + Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Min.getValueOr(0))); + Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, SMin.getValueOr(0))); }; assert(!Version.empty() && "unexpected empty version"); @@ -3952,8 +3952,8 @@ CodeGenFunction::EmitBuiltinAvailable(const VersionTuple &Version) { Optional Min = Version.getMinor(), SMin = Version.getSubminor(); llvm::Value *Args[] = { llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor()), - llvm::ConstantInt::get(CGM.Int32Ty, Min ? *Min : 0), - llvm::ConstantInt::get(CGM.Int32Ty, SMin ? *SMin : 0), + llvm::ConstantInt::get(CGM.Int32Ty, Min.getValueOr(0)), + llvm::ConstantInt::get(CGM.Int32Ty, SMin.getValueOr(0)) }; llvm::Value *CallRes = diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7bd7d97da43a..e35c15421520 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -688,8 +688,6 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, // Drill down to the base element type on both arrays. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); if (DRD) SrcAddr = CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); @@ -776,7 +774,7 @@ LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, } void ReductionCodeGen::emitAggregateInitialization( - CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, const OMPDeclareReductionDecl *DRD) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current @@ -789,7 +787,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress(CGF)); + DRD, SharedAddr); } ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds, @@ -883,7 +881,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, } void ReductionCodeGen::emitInitialization( - CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref DefaultInit) { assert(SharedAddresses.size() > N && "No variable was generated"); const auto *PrivateVD = @@ -893,21 +891,15 @@ void ReductionCodeGen::emitInitialization( QualType PrivateType = PrivateVD->getType(); PrivateAddr = CGF.Builder.CreateElementBitCast( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); - QualType SharedType = SharedAddresses[N].first.getType(); - SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), - CGF.ConvertTypeForMem(SharedType)), - SharedType, SharedAddresses[N].first.getBaseInfo(), - CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { if (DRD && DRD->getInitializer()) (void)DefaultInit(CGF); - emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { (void)DefaultInit(CGF); + QualType SharedType = SharedAddresses[N].first.getType(); emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(CGF), - SharedLVal.getType()); + PrivateAddr, SharedAddr, SharedType); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, @@ -4501,10 +4493,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, std::tie(Addr, Size) = getPointerAndSize(CGF, E); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); LValue Base = CGF.MakeAddrLValue( - Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), - AffinitiesArray.getPointer(), Idx), - AffinitiesArray.getAlignment()), - KmpTaskAffinityInfoTy); + CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); // affs[i].base_addr = &; LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); @@ -4668,12 +4657,10 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getElementType(), Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + Address DepObjAddr = CGF.Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); @@ -4709,10 +4696,7 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue &PosLVal = *Pos.get(); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); Base = CGF.MakeAddrLValue( - Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), - DependenciesArray.getPointer(), Idx), - DependenciesArray.getAlignment()), - KmpDependInfoTy); + CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); } // deps[i].base_addr = &; LValue BaseAddrLVal = CGF.EmitLValueForField( @@ -4769,12 +4753,10 @@ emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, Base.getAddress(CGF), KmpDependInfoPtrT); Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getElementType(), Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + Address DepObjAddr = CGF.Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); @@ -4830,12 +4812,10 @@ static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, Base.getTBAAInfo()); // Get number of elements in a single depobj. - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getElementType(), Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + Address DepObjAddr = CGF.Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); @@ -4847,10 +4827,7 @@ static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, ElSize, CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); - Address DepAddr = - Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), - DependenciesArray.getPointer(), Pos), - DependenciesArray.getAlignment()); + Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); // Increase pos. @@ -5932,25 +5909,20 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); - LValue OrigLVal; + Address OrigAddr = Address::invalid(); // If initializer uses initializer from declare reduction construct, emit a // pointer to the address of the original reduction item (reuired by reduction // initializer) if (RCG.usesReductionInitializer(N)) { Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); - SharedAddr = CGF.EmitLoadOfPointer( + OrigAddr = CGF.EmitLoadOfPointer( SharedAddr, CGM.getContext().VoidPtrTy.castAs()->getTypePtr()); - OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); - } else { - OrigLVal = CGF.MakeNaturalAlignAddrLValue( - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), - CGM.getContext().VoidPtrTy); } // Emit the initializer: // %0 = bitcast void* %arg to * // store , * %0 - RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, + RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, [](CodeGenFunction &) { return false; }); CGF.FinishFunction(); return Fn; @@ -12816,7 +12788,7 @@ void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); for (const auto &Pair : It->DeclToUniqueName) { const auto *VD = cast(Pair.first->getCanonicalDecl()); - if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) + if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) continue; auto I = LPCI->getSecond().find(Pair.first); assert(I != LPCI->getSecond().end() && diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index c7b69dee9160..b83ec78696d1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -162,10 +162,10 @@ class ReductionCodeGen { /// Performs aggregate initialization. /// \param N Number of reduction item in the common list. /// \param PrivateAddr Address of the corresponding private item. - /// \param SharedLVal Address of the original shared variable. + /// \param SharedAddr Address of the original shared variable. /// \param DRD Declare reduction construct used for reduction item. void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr, LValue SharedLVal, + Address PrivateAddr, Address SharedAddr, const OMPDeclareReductionDecl *DRD); public: @@ -187,10 +187,10 @@ class ReductionCodeGen { /// \param PrivateAddr Address of the corresponding private item. /// \param DefaultInit Default initialization sequence that should be /// performed if no reduction specific initialization is found. - /// \param SharedLVal Address of the original shared variable. + /// \param SharedAddr Address of the original shared variable. void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, - LValue SharedLVal, + Address SharedAddr, llvm::function_ref DefaultInit); /// Returns true if the private copy requires cleanups. bool needCleanups(unsigned N); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 6893ab6e2aa0..866454ddeaed 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2190,11 +2190,8 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // elemptr = ((CopyType*)(elemptrptr)) + I Address ElemPtr = Address(ElemPtrPtr, Align); ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType); - if (NumIters > 1) { - ElemPtr = Address(Bld.CreateGEP(ElemPtr.getElementType(), - ElemPtr.getPointer(), Cnt), - ElemPtr.getAlignment()); - } + if (NumIters > 1) + ElemPtr = Bld.CreateGEP(ElemPtr, Cnt); // Get pointer to location in transfer medium. // MediumPtr = &medium[warp_id] @@ -2260,11 +2257,8 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); Address TargetElemPtr = Address(TargetElemPtrVal, Align); TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType); - if (NumIters > 1) { - TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getElementType(), - TargetElemPtr.getPointer(), Cnt), - TargetElemPtr.getAlignment()); - } + if (NumIters > 1) + TargetElemPtr = Bld.CreateGEP(TargetElemPtr, Cnt); // *TargetElemPtr = SrcMediumVal; llvm::Value *SrcMediumValue = diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 2509de486671..4c11f7d67534 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -24,10 +24,13 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" using namespace clang; using namespace CodeGen; @@ -375,8 +378,7 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), Ctx.getPointerType(DstType), Loc); Address TmpAddr = - CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) - .getAddress(CGF); + CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF); return TmpAddr; } @@ -1245,7 +1247,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( RedCG.emitAggregateType(*this, Count); AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), - RedCG.getSharedLValue(Count), + RedCG.getSharedLValue(Count).getAddress(*this), [&Emission](CodeGenFunction &CGF) { CGF.EmitAutoVarInit(Emission); return true; @@ -4299,10 +4301,10 @@ class CheckVarsEscapingUntiedTaskDeclContext final PrivateDecls.push_back(VD); } } - void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } - void VisitCapturedStmt(const CapturedStmt *) { return; } - void VisitLambdaExpr(const LambdaExpr *) { return; } - void VisitBlockExpr(const BlockExpr *) { return; } + void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} + void VisitCapturedStmt(const CapturedStmt *) {} + void VisitLambdaExpr(const LambdaExpr *) {} + void VisitBlockExpr(const BlockExpr *) {} void VisitStmt(const Stmt *S) { if (!S) return; @@ -4431,6 +4433,53 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); + // Generate debug info for variables present in shared clause. + if (auto *DI = CGF.getDebugInfo()) { + llvm::SmallDenseMap CaptureFields = + CGF.CapturedStmtInfo->getCaptureFields(); + llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); + if (CaptureFields.size() && ContextValue) { + unsigned CharWidth = CGF.getContext().getCharWidth(); + // The shared variables are packed together as members of structure. + // So the address of each shared variable can be computed by adding + // offset of it (within record) to the base address of record. For each + // shared variable, debug intrinsic llvm.dbg.declare is generated with + // appropriate expressions (DIExpression). + // Ex: + // %12 = load %struct.anon*, %struct.anon** %__context.addr.i + // call void @llvm.dbg.declare(metadata %struct.anon* %12, + // metadata !svar1, + // metadata !DIExpression(DW_OP_deref)) + // call void @llvm.dbg.declare(metadata %struct.anon* %12, + // metadata !svar2, + // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) + for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { + const VarDecl *SharedVar = It->first; + RecordDecl *CaptureRecord = It->second->getParent(); + const ASTRecordLayout &Layout = + CGF.getContext().getASTRecordLayout(CaptureRecord); + unsigned Offset = + Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; + (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, + CGF.Builder, false); + llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); + // Get the call dbg.declare instruction we just created and update + // its DIExpression to add offset to base address. + if (auto DDI = dyn_cast(&Last)) { + SmallVector Ops; + // Add offset to the base address if non zero. + if (Offset) { + Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); + Ops.push_back(Offset); + } + Ops.push_back(llvm::dwarf::DW_OP_deref); + auto &Ctx = DDI->getContext(); + llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops); + Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr)); + } + } + } + } llvm::SmallVector, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { @@ -5918,6 +5967,9 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; + case OMPC_compare: + // Do nothing here as we already emit an error. + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 52c54d3c7a72..b72b16cf2b5f 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -571,7 +571,6 @@ void BackendConsumer::SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &DI) { // If Loc is invalid, we still need to report the issue, it just gets no // location info. Diags.Report(Loc, DiagID).AddString(Message); - return; } bool diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index b437ba01c676..e6adec6948af 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -188,8 +188,8 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; CharUnits Alignment = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo); - return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo, - TBAAInfo); + Address Addr(V, ConvertTypeForMem(T), Alignment); + return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, TBAAInfo); } /// Given a value of type T* that may not be to a complete object, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 481ec2d606b1..f76ce8a6400d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -459,6 +459,11 @@ class CodeGenFunction : public CodeGenTypeCache { /// Get the name of the capture helper. virtual StringRef getHelperName() const { return "__captured_stmt"; } + /// Get the CaptureFields + llvm::SmallDenseMap getCaptureFields() { + return CaptureFields; + } + private: /// The kind of captured statement being generated. CapturedRegionKind Kind; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 45bfcefd49f4..3b551ea94cc2 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -43,6 +43,7 @@ #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" #include "ToolChains/RISCVToolchain.h" +#include "ToolChains/SPIRV.h" #include "ToolChains/Solaris.h" #include "ToolChains/TCE.h" #include "ToolChains/VEToolchain.h" @@ -3774,6 +3775,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, } } + // FIXME: Linking separate translation units for SPIR-V is not supported yet. + // It can be done either by LLVM IR linking before conversion of the final + // linked module to SPIR-V or external SPIR-V linkers can be used e.g. + // spirv-link. + if (C.getDefaultToolChain().getTriple().isSPIRV() && Inputs.size() > 1) { + Diag(clang::diag::warn_drv_spirv_linking_multiple_inputs_unsupported); + } + handleArguments(C, Args, Inputs, Actions); // Builder to be used to build offloading actions. @@ -3813,8 +3822,15 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Queue linker inputs. if (Phase == phases::Link) { assert(Phase == PL.back() && "linking must be final compilation step."); - LinkerInputs.push_back(Current); - Current = nullptr; + // Compilation phases are setup per language, however for SPIR-V the + // final linking phase is meaningless since the compilation phase + // produces the final binary. + // FIXME: OpenCL - we could strip linking phase out from OpenCL + // compilation phases if we could verify it is not needed by any target. + if (!C.getDefaultToolChain().getTriple().isSPIRV()) { + LinkerInputs.push_back(Current); + Current = nullptr; + } break; } @@ -4387,6 +4403,12 @@ class ToolSelector final { if (!T) return nullptr; + // Can't collapse if we don't have codegen support unless we are + // emitting LLVM IR. + bool OutputIsLLVM = types::isLLVMIR(ActionInfo[0].JA->getType()); + if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) + return nullptr; + // When using -fembed-bitcode, it is required to have the same tool (clang) // for both CompilerJA and BackendJA. Otherwise, combine two stages. if (EmbedBitcode) { @@ -4456,6 +4478,12 @@ class ToolSelector final { if (!T) return nullptr; + // Can't collapse if we don't have codegen support unless we are + // emitting LLVM IR. + bool OutputIsLLVM = types::isLLVMIR(ActionInfo[0].JA->getType()); + if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) + return nullptr; + if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode)) return nullptr; @@ -5479,6 +5507,10 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::ve: TC = std::make_unique(*this, Target, Args); break; + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + TC = std::make_unique(*this, Target, Args); + break; default: if (Target.getVendor() == llvm::Triple::Myriad) TC = std::make_unique(*this, Target, diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp index 5b87106b6565..f63763effaff 100644 --- a/clang/lib/Driver/Job.cpp +++ b/clang/lib/Driver/Job.cpp @@ -388,6 +388,8 @@ int CC1Command::Execute(ArrayRef> Redirects, Argv.push_back(getExecutable()); Argv.append(getArguments().begin(), getArguments().end()); Argv.push_back(nullptr); + Argv.pop_back(); // The terminating null element shall not be part of the + // slice (main() behavior). // This flag simply indicates that the program couldn't start, which isn't // applicable here. diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index ac033dd427c2..50c89aaadc18 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -260,7 +260,7 @@ bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const { Tool *ToolChain::getClang() const { if (!Clang) - Clang.reset(new tools::Clang(*this)); + Clang.reset(new tools::Clang(*this, useIntegratedBackend())); return Clang.get(); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2a3723975568..65347a38490e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7007,18 +7007,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics, options::OPT_mno_outline_atomics)) { - if (A->getOption().matches(options::OPT_moutline_atomics)) { - // Option -moutline-atomics supported for AArch64 target only. - if (!Triple.isAArch64()) { - D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt) - << Triple.getArchName(); - } else { + // Option -moutline-atomics supported for AArch64 target only. + if (!Triple.isAArch64()) { + D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt) + << Triple.getArchName() << A->getOption().getName(); + } else { + if (A->getOption().matches(options::OPT_moutline_atomics)) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("+outline-atomics"); + } else { + CmdArgs.push_back("-target-feature"); + CmdArgs.push_back("-outline-atomics"); } - } else { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("-outline-atomics"); } } else if (Triple.isAArch64() && getToolChain().IsAArch64OutlineAtomicsDefault(Args)) { @@ -7126,11 +7126,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.ClaimAllArgs(options::OPT_emit_llvm); } -Clang::Clang(const ToolChain &TC) +Clang::Clang(const ToolChain &TC, bool HasIntegratedBackend) // CAUTION! The first constructor argument ("clang") is not arbitrary, // as it is for other tools. Some operations on a Tool actually test // whether that tool is Clang based on the Tool's Name as a string. - : Tool("clang", "clang frontend", TC) {} + : Tool("clang", "clang frontend", TC), HasBackend(HasIntegratedBackend) {} Clang::~Clang() {} diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index d4b4988b4a8c..00e0490e069b 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -26,6 +26,10 @@ namespace tools { /// Clang compiler tool. class LLVM_LIBRARY_VISIBILITY Clang : public Tool { + // Indicates whether this instance has integrated backend using + // internal LLVM infrastructure. + bool HasBackend; + public: static const char *getBaseInputName(const llvm::opt::ArgList &Args, const InputInfo &Input); @@ -99,11 +103,12 @@ class LLVM_LIBRARY_VISIBILITY Clang : public Tool { const InputInfo &Input, const llvm::opt::ArgList &Args) const; public: - Clang(const ToolChain &TC); + Clang(const ToolChain &TC, bool HasIntegratedBackend = true); ~Clang() override; bool hasGoodDiagnostics() const override { return true; } bool hasIntegratedAssembler() const override { return true; } + bool hasIntegratedBackend() const override { return HasBackend; } bool hasIntegratedCPP() const override { return true; } bool canEmitIR() const override { return true; } diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index c08f825e0cb9..6d553791b394 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -365,5 +365,4 @@ void HIPAMDToolChain::checkTargetID( getDriver().Diag(clang::diag::err_drv_bad_target_id) << PTID.OptionalTargetID.getValue(); } - return; } diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 2ce7904ecc40..ba3040636604 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -26,8 +26,8 @@ using namespace clang; using namespace llvm::opt; // Default hvx-length for various versions. -static StringRef getDefaultHvxLength(StringRef Cpu) { - return llvm::StringSwitch(Cpu) +static StringRef getDefaultHvxLength(StringRef HvxVer) { + return llvm::StringSwitch(HvxVer) .Case("v60", "64b") .Case("v62", "64b") .Case("v65", "64b") @@ -51,42 +51,107 @@ static void handleHVXTargetFeatures(const Driver &D, const ArgList &Args, // Handle HVX warnings. handleHVXWarnings(D, Args); - // Add the +hvx* features based on commandline flags. - StringRef HVXFeature, HVXLength; - - // Handle -mhvx, -mhvx=, -mno-hvx. - if (Arg *A = Args.getLastArg(options::OPT_mno_hexagon_hvx, - options::OPT_mhexagon_hvx, - options::OPT_mhexagon_hvx_EQ)) { - if (A->getOption().matches(options::OPT_mno_hexagon_hvx)) - return; - if (A->getOption().matches(options::OPT_mhexagon_hvx_EQ)) { - HasHVX = true; - HVXFeature = Cpu = A->getValue(); - HVXFeature = Args.MakeArgString(llvm::Twine("+hvx") + HVXFeature.lower()); - } else if (A->getOption().matches(options::OPT_mhexagon_hvx)) { - HasHVX = true; - HVXFeature = Args.MakeArgString(llvm::Twine("+hvx") + Cpu); + auto makeFeature = [&Args](Twine T, bool Enable) -> StringRef { + const std::string &S = T.str(); + StringRef Opt(S); + if (Opt.endswith("=")) + Opt = Opt.drop_back(1); + if (Opt.startswith("mno-")) + Opt = Opt.drop_front(4); + else if (Opt.startswith("m")) + Opt = Opt.drop_front(1); + return Args.MakeArgString(Twine(Enable ? "+" : "-") + Twine(Opt)); + }; + + auto withMinus = [](StringRef S) -> std::string { + return "-" + S.str(); + }; + + // Drop tiny core suffix for HVX version. + std::string HvxVer = + (Cpu.back() == 'T' || Cpu.back() == 't' ? Cpu.drop_back(1) : Cpu).str(); + HasHVX = false; + + // Handle -mhvx, -mhvx=, -mno-hvx. If both present, -mhvx= wins over -mhvx. + auto argOrNull = [&Args](auto FlagOn, auto FlagOff) -> Arg* { + if (Arg *A = Args.getLastArg(FlagOn, FlagOff)) { + if (A->getOption().matches(FlagOn)) + return A; } - Features.push_back(HVXFeature); + return nullptr; + }; + + Arg *HvxBareA = + argOrNull(options::OPT_mhexagon_hvx, options::OPT_mno_hexagon_hvx); + Arg *HvxVerA = + argOrNull(options::OPT_mhexagon_hvx_EQ, options::OPT_mno_hexagon_hvx); + + if (Arg *A = HvxVerA ? HvxVerA : HvxBareA) { + if (A->getOption().matches(options::OPT_mhexagon_hvx_EQ)) + HvxVer = StringRef(A->getValue()).lower(); // lower produces std:string + HasHVX = true; + Features.push_back(makeFeature(Twine("hvx") + HvxVer, true)); + } else if (Arg *A = Args.getLastArg(options::OPT_mno_hexagon_hvx)) { + // If there was an explicit -mno-hvx, add -hvx to target features. + Features.push_back(makeFeature(A->getOption().getName(), false)); } + StringRef HvxLen = getDefaultHvxLength(HvxVer); + // Handle -mhvx-length=. if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_length_EQ)) { // These flags are valid only if HVX in enabled. if (!HasHVX) - D.Diag(diag::err_drv_invalid_hvx_length); + D.Diag(diag::err_drv_needs_hvx) << withMinus(A->getOption().getName()); else if (A->getOption().matches(options::OPT_mhexagon_hvx_length_EQ)) - HVXLength = A->getValue(); + HvxLen = A->getValue(); + } + + if (HasHVX) { + StringRef L = makeFeature(Twine("hvx-length") + HvxLen.lower(), true); + Features.push_back(L); } - // Default hvx-length based on Cpu. - else if (HasHVX) - HVXLength = getDefaultHvxLength(Cpu); - - if (!HVXLength.empty()) { - HVXFeature = - Args.MakeArgString(llvm::Twine("+hvx-length") + HVXLength.lower()); - Features.push_back(HVXFeature); + + unsigned HvxVerNum; + // getAsInteger returns 'true' on error. + if (StringRef(HvxVer).drop_front(1).getAsInteger(10, HvxVerNum)) + HvxVerNum = 0; + + // Handle HVX floating point flags. + auto checkFlagHvxVersion = [&](auto FlagOn, auto FlagOff, + unsigned MinVerNum) -> Optional { + // Return an Optional: + // - None indicates a verification failure, or that the flag was not + // present in Args. + // - Otherwise the returned value is that name of the feature to add + // to Features. + Arg *A = Args.getLastArg(FlagOn, FlagOff); + if (!A) + return None; + + StringRef OptName = A->getOption().getName(); + if (A->getOption().matches(FlagOff)) + return makeFeature(OptName, false); + + if (!HasHVX) { + D.Diag(diag::err_drv_needs_hvx) << withMinus(OptName); + return None; + } + if (HvxVerNum < MinVerNum) { + D.Diag(diag::err_drv_needs_hvx_version) + << withMinus(OptName) << ("v" + std::to_string(HvxVerNum)); + return None; + } + return makeFeature(OptName, true); + }; + + if (auto F = checkFlagHvxVersion(options::OPT_mhexagon_hvx_qfloat, + options::OPT_mno_hexagon_hvx_qfloat, 68)) { + Features.push_back(*F); + } + if (auto F = checkFlagHvxVersion(options::OPT_mhexagon_hvx_ieee_fp, + options::OPT_mno_hexagon_hvx_ieee_fp, 68)) { + Features.push_back(*F); } } @@ -117,7 +182,7 @@ void hexagon::getHexagonTargetFeatures(const Driver &D, const ArgList &Args, handleHVXTargetFeatures(D, Args, Features, Cpu, HasHVX); if (HexagonToolChain::isAutoHVXEnabled(Args) && !HasHVX) - D.Diag(diag::warn_drv_vectorize_needs_hvx); + D.Diag(diag::warn_drv_needs_hvx) << "auto-vectorization"; } // Hexagon tools start. @@ -156,6 +221,12 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsyntax-only"); } + if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_ieee_fp, + options::OPT_mno_hexagon_hvx_ieee_fp)) { + if (A->getOption().matches(options::OPT_mhexagon_hvx_ieee_fp)) + CmdArgs.push_back("-mhvx-ieee-fp"); + } + if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue()))); } diff --git a/clang/lib/Driver/ToolChains/SPIRV.cpp b/clang/lib/Driver/ToolChains/SPIRV.cpp index 16e72d3c733f..50d03e79bbb0 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.cpp +++ b/clang/lib/Driver/ToolChains/SPIRV.cpp @@ -13,6 +13,7 @@ #include "clang/Driver/Options.h" using namespace clang::driver; +using namespace clang::driver::toolchains; using namespace clang::driver::tools; using namespace llvm::opt; @@ -27,7 +28,7 @@ void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T, if (Input.getType() == types::TY_PP_Asm) CmdArgs.push_back("-to-binary"); if (Output.getType() == types::TY_PP_Asm) - CmdArgs.push_back("-spirv-text"); + CmdArgs.push_back("--spirv-tools-dis"); CmdArgs.append({"-o", Output.getFilename()}); @@ -47,3 +48,25 @@ void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA, llvm_unreachable("Invalid number of input files."); constructTranslateCommand(C, *this, JA, Output, Inputs[0], {}); } + +clang::driver::Tool *SPIRVToolChain::getTranslator() const { + if (!Translator) + Translator = std::make_unique(*this); + return Translator.get(); +} + +clang::driver::Tool *SPIRVToolChain::SelectTool(const JobAction &JA) const { + Action::ActionClass AC = JA.getKind(); + return SPIRVToolChain::getTool(AC); +} + +clang::driver::Tool *SPIRVToolChain::getTool(Action::ActionClass AC) const { + switch (AC) { + default: + break; + case Action::BackendJobClass: + case Action::AssembleJobClass: + return SPIRVToolChain::getTranslator(); + } + return ToolChain::getTool(AC); +} diff --git a/clang/lib/Driver/ToolChains/SPIRV.h b/clang/lib/Driver/ToolChains/SPIRV.h index 35d0446bd8b8..229f7018e3b5 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.h +++ b/clang/lib/Driver/ToolChains/SPIRV.h @@ -41,6 +41,39 @@ class LLVM_LIBRARY_VISIBILITY Translator : public Tool { } // namespace SPIRV } // namespace tools + +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY SPIRVToolChain final : public ToolChain { + mutable std::unique_ptr Translator; + +public: + SPIRVToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) + : ToolChain(D, Triple, Args) {} + + bool useIntegratedAs() const override { return true; } + bool useIntegratedBackend() const override { return false; } + + bool IsMathErrnoDefault() const override { return false; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault(const llvm::opt::ArgList &Args) const override { + return false; + } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + + clang::driver::Tool *SelectTool(const JobAction &JA) const override; + +protected: + clang::driver::Tool *getTool(Action::ActionClass AC) const override; + +private: + clang::driver::Tool *getTranslator() const; +}; + +} // namespace toolchains } // namespace driver } // namespace clang #endif diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 0a51a53da400..505a7250572b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3725,6 +3725,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; if (Style.isCSharp()) { + if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) && + Style.BraceWrapping.AfterFunction) + return true; if (Right.is(TT_CSharpNamedArgumentColon) || Left.is(TT_CSharpNamedArgumentColon)) return false; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 3d4c1a4f903b..f652a4e7088f 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -393,11 +393,18 @@ class LineJoiner { // Try to merge a block with left brace wrapped that wasn't yet covered if (TheLine->Last->is(tok::l_brace)) { + const FormatToken *Tok = TheLine->First; bool ShouldMerge = false; - if (TheLine->First->isOneOf(tok::kw_class, tok::kw_struct)) { + if (Tok->is(tok::kw_typedef)) { + Tok = Tok->getNextNonComment(); + assert(Tok); + } + if (Tok->isOneOf(tok::kw_class, tok::kw_struct)) { ShouldMerge = !Style.BraceWrapping.AfterClass || (I[1]->First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyRecord); + } else if (Tok->is(tok::kw_enum)) { + ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else { ShouldMerge = !Style.BraceWrapping.AfterFunction || (I[1]->First->is(tok::r_brace) && diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c2ae691bfd3d..b6e55aab708f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1138,7 +1138,6 @@ void UnwrappedLineParser::parseModuleImport() { } addUnwrappedLine(); - return; } // readTokenWithJavaScriptASI reads the next token and terminates the current @@ -1439,7 +1438,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { return; case tok::kw_requires: parseRequires(); - break; + return; case tok::kw_enum: // Ignore if this is part of "template is(tok::less)) { @@ -1637,19 +1636,9 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { break; } case tok::equal: - // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType - // TT_FatArrow. They always start an expression or a child block if - // followed by a curly brace. - if (FormatTok->is(TT_FatArrow)) { - nextToken(); - if (FormatTok->is(tok::l_brace)) { - // C# may break after => if the next character is a newline. - if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) { - // calling `addUnwrappedLine()` here causes odd parsing errors. - FormatTok->MustBreakBefore = true; - } - parseChildBlock(); - } + if ((Style.isJavaScript() || Style.isCSharp()) && + FormatTok->is(TT_FatArrow)) { + tryToParseChildBlock(); break; } @@ -1725,7 +1714,7 @@ bool UnwrappedLineParser::tryToParsePropertyAccessor() { // Try to parse the property accessor: // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties Tokens->setPosition(StoredPosition); - if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true) + if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) addUnwrappedLine(); nextToken(); do { @@ -1940,18 +1929,15 @@ bool UnwrappedLineParser::tryToParseBracedList() { return true; } -bool UnwrappedLineParser::tryToParseCSharpLambda() { - // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType - // TT_FatArrow. They always start an expression or a child block if - // followed by a curly brace. +bool UnwrappedLineParser::tryToParseChildBlock() { + assert(Style.isJavaScript() || Style.isCSharp()); + assert(FormatTok->is(TT_FatArrow)); + // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. + // They always start an expression or a child block if followed by a curly + // brace. nextToken(); if (FormatTok->isNot(tok::l_brace)) return false; - // C# may break after => if the next character is a newline. - if (Style.BraceWrapping.AfterFunction) { - // calling `addUnwrappedLine()` here causes odd parsing errors. - FormatTok->MustBreakBefore = true; - } parseChildBlock(); return true; } @@ -1964,24 +1950,15 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssignmentExpression() inside. do { - if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) - if (tryToParseCSharpLambda()) - continue; + if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && + tryToParseChildBlock()) + continue; if (Style.isJavaScript()) { if (FormatTok->is(Keywords.kw_function) || FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { tryToParseJSFunction(); continue; } - if (FormatTok->is(TT_FatArrow)) { - nextToken(); - // Fat arrows can be followed by simple expressions or by child blocks - // in curly braces. - if (FormatTok->is(tok::l_brace)) { - parseChildBlock(); - continue; - } - } if (FormatTok->is(tok::l_brace)) { // Could be a method inside of a braced list `{a() { return 1; }}`. if (tryToParseBracedList()) @@ -1996,12 +1973,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, return !HasError; } switch (FormatTok->Tok.getKind()) { - case tok::caret: - nextToken(); - if (FormatTok->is(tok::l_brace)) { - parseChildBlock(); - } - break; case tok::l_square: if (Style.isCSharp()) parseSquare(); @@ -2093,7 +2064,7 @@ void UnwrappedLineParser::parseParens() { break; case tok::equal: if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) - tryToParseCSharpLambda(); + tryToParseChildBlock(); else nextToken(); break; diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 3c07fbf2e8d3..0c79723d50fc 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -138,7 +138,7 @@ class UnwrappedLineParser { // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint void parseCSharpGenericTypeConstraint(); bool tryToParseLambda(); - bool tryToParseCSharpLambda(); + bool tryToParseChildBlock(); bool tryToParseLambdaIntroducer(); bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 1432607204bd..31e7ea3d243d 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1154,12 +1154,12 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, // Remove any macro definitions that are explicitly ignored by the module. // They aren't supposed to affect how the module is built anyway. HeaderSearchOptions &HSOpts = Invocation->getHeaderSearchOpts(); - llvm::erase_if( - PPOpts.Macros, [&HSOpts](const std::pair &def) { - StringRef MacroDef = def.first; - return HSOpts.ModulesIgnoreMacros.count( - llvm::CachedHashString(MacroDef.split('=').first)) > 0; - }); + llvm::erase_if(PPOpts.Macros, + [&HSOpts](const std::pair &def) { + StringRef MacroDef = def.first; + return HSOpts.ModulesIgnoreMacros.contains( + llvm::CachedHashString(MacroDef.split('=').first)); + }); // If the original compiler invocation had -fmodule-name, pass it through. Invocation->getLangOpts()->ModuleName = diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 106da642f361..b71addd84bfd 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4518,7 +4518,7 @@ bool CompilerInvocation::CreateFromArgsImpl( // Store the command-line for using in the CodeView backend. Res.getCodeGenOpts().Argv0 = Argv0; - Res.getCodeGenOpts().CommandLineArgs = CommandLineArgs; + append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); FixupInvocation(Res, Diags, Args, DashX); diff --git a/clang/lib/Headers/hexagon_protos.h b/clang/lib/Headers/hexagon_protos.h index cdffd93bb859..2642f3c8428d 100644 --- a/clang/lib/Headers/hexagon_protos.h +++ b/clang/lib/Headers/hexagon_protos.h @@ -8003,17 +8003,6 @@ #define Q6_P_vtrunohb_PP __builtin_HEXAGON_S6_vtrunohb_ppp #endif /* __HEXAGON_ARCH___ >= 62 */ -#if __HEXAGON_ARCH__ >= 62 -/* ========================================================================== - Assembly Syntax: Vd32=vmem(Rt32):nt - C Intrinsic Prototype: HVX_Vector Q6_V_vmem_R_nt(Word32 Rt) - Instruction Type: MAPPING - Execution Slots: SLOT0123 - ========================================================================== */ - -#define Q6_V_vmem_R_nt __builtin_HEXAGON_V6_ldntnt0 -#endif /* __HEXAGON_ARCH___ >= 62 */ - #if __HEXAGON_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Pd4=!any8(vcmpb.eq(Rss32,Rtt32)) diff --git a/clang/lib/Headers/hexagon_types.h b/clang/lib/Headers/hexagon_types.h index 6958809418d8..029727cc4817 100644 --- a/clang/lib/Headers/hexagon_types.h +++ b/clang/lib/Headers/hexagon_types.h @@ -1177,37 +1177,6 @@ class HEXAGON_Vect32C { #endif /* __cplusplus */ -// V65 Silver types -#if __Q6S_ARCH__ >= 65 - // Silver vector types are 128 bytes, and pairs are 256. The vector predicate - // types are 16 bytes and 32 bytes for pairs. - typedef long HEXAGON_VecPred128 __attribute__((__vector_size__(16))) - __attribute__((aligned(128))); - - typedef long HEXAGON_VecPred256 __attribute__((__vector_size__(32))) - __attribute__((aligned(128))); - - typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) - __attribute__((aligned(128))); - - typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256))) - __attribute__((aligned(256))); - - typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) - __attribute__((aligned(4))); - - typedef long HEXAGON_UVect2048 __attribute__((__vector_size__(256))) - __attribute__((aligned(4))); - - #define Q6S_VectorPredPair HEXAGON_VecPred256 - #define Q6S_VectorPred HEXAGON_VecPred128 - #define Q6S_Vector HEXAGON_Vect1024 - #define Q6S_VectorPair HEXAGON_Vect2048 - #define Q6S_UVector HEXAGON_UVect1024 - #define Q6S_UVectorPair HEXAGON_UVect2048 - -#else /* __Q6S_ARCH__ >= 65 */ - // V65 Vector types #if __HVX_ARCH__ >= 65 #if defined __HVX__ && (__HVX_LENGTH__ == 128) @@ -1256,7 +1225,6 @@ class HEXAGON_Vect32C { #endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ #endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #endif /* __HVX_ARCH__ >= 65 */ -#endif /* __Q6S_ARCH__ >= 65 */ /* Predicates */ diff --git a/clang/lib/Headers/hvx_hexagon_protos.h b/clang/lib/Headers/hvx_hexagon_protos.h index 41ce7a6b93e9..7e3679a38b2c 100644 --- a/clang/lib/Headers/hvx_hexagon_protos.h +++ b/clang/lib/Headers/hvx_hexagon_protos.h @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// - #ifndef _HVX_HEXAGON_PROTOS_H_ #define _HVX_HEXAGON_PROTOS_H_ 1 @@ -28,7 +27,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_R_vextract_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw) +#define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -39,7 +38,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_hi_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi) +#define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -50,7 +49,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_lo_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo) +#define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -61,7 +60,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw) +#define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -72,7 +71,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_and_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and) +#define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -83,7 +82,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_and_QQn __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n) +#define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -94,7 +93,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_not_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not) +#define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -105,7 +104,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_or_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or) +#define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -116,7 +115,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_or_QQn __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n) +#define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -127,7 +126,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vsetq_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2) +#define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -138,7 +137,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_xor_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor) +#define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -149,7 +148,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QnRIV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai) +#define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -160,7 +159,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QnRIV_nt __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai) +#define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -171,7 +170,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QRIV_nt __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai) +#define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -182,7 +181,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QRIV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai) +#define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -193,7 +192,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuh_vabsdiff_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh) +#define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -204,7 +203,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vub_vabsdiff_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub) +#define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -215,7 +214,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuh_vabsdiff_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh) +#define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -226,7 +225,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vabsdiff_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw) +#define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -237,7 +236,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vabs_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh) +#define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -248,7 +247,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vabs_Vh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat) +#define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -259,7 +258,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vabs_Vw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw) +#define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -270,7 +269,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vabs_Vw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat) +#define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -281,7 +280,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vadd_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb) +#define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -292,7 +291,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vadd_WbWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv) +#define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -303,7 +302,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condacc_QnVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq) +#define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -314,7 +313,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq) +#define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -325,7 +324,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vadd_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh) +#define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -336,7 +335,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vadd_WhWh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv) +#define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -347,7 +346,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condacc_QnVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq) +#define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -358,7 +357,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq) +#define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -369,7 +368,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vadd_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat) +#define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -380,7 +379,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vadd_WhWh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv) +#define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -391,7 +390,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vadd_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw) +#define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -402,7 +401,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vadd_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh) +#define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -413,7 +412,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vadd_VubVub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat) +#define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -424,7 +423,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wub_vadd_WubWub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv) +#define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -435,7 +434,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vadd_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat) +#define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -446,7 +445,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vadd_WuhWuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv) +#define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -457,7 +456,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vadd_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw) +#define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -468,7 +467,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw) +#define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -479,7 +478,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vadd_WwWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv) +#define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -490,7 +489,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condacc_QnVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq) +#define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -501,7 +500,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq) +#define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -512,7 +511,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat) +#define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -523,7 +522,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vadd_WwWw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv) +#define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -534,7 +533,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_valign_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb) +#define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -545,7 +544,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_valign_VVI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi) +#define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -556,7 +555,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vand_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand) +#define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -567,7 +566,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vand_QR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) +#define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -578,7 +577,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vandor_VQR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc) +#define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -589,7 +588,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Q_vand_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt) +#define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -600,7 +599,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Q_vandor_QVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc) +#define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -611,7 +610,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasl_VhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh) +#define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -622,7 +621,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasl_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv) +#define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -633,7 +632,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasl_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw) +#define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -644,7 +643,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vaslacc_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc) +#define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -655,7 +654,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasl_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv) +#define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -666,7 +665,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh) +#define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -677,7 +676,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vasr_VhVhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat) +#define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -688,7 +687,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VhVhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat) +#define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -699,7 +698,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VhVhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat) +#define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -710,7 +709,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv) +#define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -721,7 +720,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasr_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw) +#define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -732,7 +731,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasracc_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc) +#define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -743,7 +742,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh) +#define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -754,7 +753,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VwVwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat) +#define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -765,7 +764,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VwVwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat) +#define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -776,7 +775,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VwVwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat) +#define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -787,7 +786,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasr_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv) +#define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -798,7 +797,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_equals_V __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign) +#define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -809,7 +808,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_equals_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp) +#define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -820,7 +819,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vavg_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh) +#define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -831,7 +830,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vavg_VhVh_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd) +#define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -842,7 +841,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vavg_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub) +#define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -853,7 +852,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vavg_VubVub_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd) +#define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -864,7 +863,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vavg_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh) +#define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -875,7 +874,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vavg_VuhVuh_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd) +#define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -886,7 +885,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vavg_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw) +#define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -897,7 +896,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vavg_VwVw_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd) +#define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -908,7 +907,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vcl0_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h) +#define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -919,7 +918,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vcl0_Vuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w) +#define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -930,7 +929,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vcombine_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine) +#define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -941,7 +940,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vzero __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0) +#define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)() #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -952,7 +951,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vdeal_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb) +#define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -963,7 +962,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vdeale_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w) +#define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -974,7 +973,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vdeal_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh) +#define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -985,7 +984,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vdeal_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd) +#define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -996,7 +995,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vdelta_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta) +#define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1007,7 +1006,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vdmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus) +#define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1018,7 +1017,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vdmpyacc_VhVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc) +#define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1029,7 +1028,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vdmpy_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv) +#define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1040,7 +1039,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vdmpyacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc) +#define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1051,7 +1050,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb) +#define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1062,7 +1061,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc) +#define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1073,7 +1072,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vdmpy_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv) +#define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1084,7 +1083,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vdmpyacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc) +#define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1095,7 +1094,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_WhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat) +#define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1106,29 +1105,29 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwWhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc) +#define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat) +#define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc) +#define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1139,7 +1138,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_WhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat) +#define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1150,40 +1149,40 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwWhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc) +#define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat) +#define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc) +#define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat) +#define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1194,7 +1193,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc) +#define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1205,7 +1204,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vdsad_WuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh) +#define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1216,7 +1215,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vdsadacc_WuwWuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc) +#define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1227,7 +1226,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eq_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb) +#define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1238,7 +1237,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqand_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and) +#define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1249,7 +1248,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqor_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or) +#define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1260,7 +1259,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqxacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor) +#define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1271,7 +1270,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eq_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh) +#define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1282,7 +1281,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqand_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and) +#define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1293,7 +1292,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqor_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or) +#define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1304,7 +1303,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqxacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor) +#define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1315,7 +1314,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eq_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw) +#define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1326,7 +1325,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqand_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and) +#define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1337,7 +1336,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqor_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or) +#define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1348,7 +1347,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqxacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor) +#define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1359,7 +1358,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb) +#define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1370,7 +1369,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and) +#define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1381,7 +1380,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or) +#define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1392,7 +1391,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor) +#define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1403,7 +1402,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth) +#define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1414,7 +1413,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and) +#define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1425,7 +1424,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or) +#define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1436,7 +1435,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor) +#define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1447,7 +1446,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub) +#define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1458,7 +1457,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and) +#define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1469,7 +1468,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or) +#define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1480,7 +1479,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor) +#define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1491,7 +1490,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh) +#define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1502,7 +1501,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and) +#define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1513,7 +1512,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or) +#define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1524,7 +1523,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor) +#define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1535,7 +1534,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw) +#define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1546,7 +1545,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and) +#define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1557,7 +1556,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or) +#define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1568,7 +1567,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor) +#define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1579,7 +1578,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw) +#define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1590,7 +1589,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and) +#define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1601,7 +1600,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or) +#define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1612,7 +1611,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor) +#define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1623,7 +1622,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vinsert_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr) +#define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1634,7 +1633,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vlalign_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb) +#define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1645,7 +1644,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vlalign_VVI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi) +#define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1656,7 +1655,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vlsr_VuhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh) +#define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1667,7 +1666,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vlsr_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv) +#define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1678,7 +1677,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vlsr_VuwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw) +#define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1689,7 +1688,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vlsr_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv) +#define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1700,7 +1699,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32_VbVbR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb) +#define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1711,7 +1710,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32or_VbVbVbR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc) +#define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1722,7 +1721,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16_VbVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh) +#define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1733,7 +1732,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16or_WhVbVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc) +#define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1744,7 +1743,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vmax_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh) +#define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1755,7 +1754,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vmax_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub) +#define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1766,7 +1765,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vmax_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh) +#define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1777,7 +1776,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vmax_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw) +#define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1788,7 +1787,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vmin_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh) +#define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1799,7 +1798,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vmin_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub) +#define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1810,7 +1809,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vmin_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh) +#define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1821,7 +1820,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vmin_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw) +#define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1832,7 +1831,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus) +#define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1843,7 +1842,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpaacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc) +#define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1854,7 +1853,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv) +#define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1865,7 +1864,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubWub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv) +#define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1876,7 +1875,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpa_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb) +#define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1887,7 +1886,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpaacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc) +#define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1898,7 +1897,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus) +#define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1909,7 +1908,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpyacc_WhVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc) +#define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1920,7 +1919,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpy_VubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv) +#define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1931,7 +1930,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpyacc_WhVubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc) +#define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1942,7 +1941,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpy_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv) +#define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1953,7 +1952,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpyacc_WhVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc) +#define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1964,7 +1963,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpye_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh) +#define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1975,7 +1974,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpy_VhRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh) +#define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1986,29 +1985,29 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc) +#define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpy_VhRh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs) +#define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpy_VhRh_s1_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss) +#define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2019,7 +2018,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpy_VhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus) +#define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2030,7 +2029,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc) +#define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2041,7 +2040,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpy_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv) +#define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2052,18 +2051,18 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc) +#define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpy_VhVh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs) +#define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2074,7 +2073,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyieo_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh) +#define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2085,7 +2084,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyieacc_VwVwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc) +#define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2096,7 +2095,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyie_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh) +#define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2107,7 +2106,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyieacc_VwVwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc) +#define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2118,7 +2117,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyi_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih) +#define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2129,7 +2128,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyiacc_VhVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc) +#define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2140,7 +2139,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyi_VhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb) +#define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2151,7 +2150,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyiacc_VhVhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc) +#define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2162,7 +2161,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyio_VwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh) +#define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2173,7 +2172,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyi_VwRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb) +#define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2184,7 +2183,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyiacc_VwVwRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc) +#define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2195,7 +2194,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyi_VwRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh) +#define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2206,7 +2205,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyiacc_VwVwRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc) +#define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2217,7 +2216,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyo_VwVh_s1_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh) +#define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2228,7 +2227,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyo_VwVh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd) +#define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2239,7 +2238,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc) +#define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2250,7 +2249,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc) +#define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2261,7 +2260,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpy_VubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub) +#define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2272,7 +2271,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpyacc_WuhVubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc) +#define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2283,7 +2282,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpy_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv) +#define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2294,7 +2293,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpyacc_WuhVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc) +#define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2305,7 +2304,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpy_VuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh) +#define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2316,7 +2315,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpyacc_WuwVuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc) +#define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2327,7 +2326,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpy_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv) +#define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2338,7 +2337,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpyacc_WuwVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc) +#define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2349,7 +2348,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vmux_QVV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux) +#define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2360,7 +2359,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vnavg_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh) +#define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2371,7 +2370,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vnavg_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub) +#define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2382,7 +2381,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vnavg_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw) +#define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2393,7 +2392,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vnormamt_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth) +#define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2404,7 +2403,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vnormamt_Vw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw) +#define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2415,7 +2414,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vnot_V __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot) +#define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2426,7 +2425,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vor_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor) +#define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2437,7 +2436,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vpacke_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb) +#define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2448,7 +2447,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpacke_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh) +#define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2459,7 +2458,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vpack_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat) +#define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2470,7 +2469,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vpack_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat) +#define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2481,7 +2480,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vpacko_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob) +#define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2492,7 +2491,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpacko_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh) +#define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2503,7 +2502,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpack_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat) +#define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2514,7 +2513,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vpack_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat) +#define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2525,7 +2524,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpopcount_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth) +#define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2536,7 +2535,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vrdelta_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta) +#define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2547,7 +2546,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus) +#define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2558,7 +2557,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpyacc_VwVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc) +#define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2569,7 +2568,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vrmpy_WubRbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi) +#define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2580,7 +2579,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vrmpyacc_WwWubRbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc) +#define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2591,18 +2590,18 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpy_VubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv) +#define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpyacc_VwVubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc) +#define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2613,18 +2612,18 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpy_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv) +#define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpyacc_VwVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc) +#define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2635,7 +2634,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpy_VubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub) +#define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2646,7 +2645,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpyacc_VuwVubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc) +#define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2657,7 +2656,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrmpy_WubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi) +#define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2668,7 +2667,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrmpyacc_WuwWubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc) +#define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2679,18 +2678,18 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpy_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv) +#define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpyacc_VuwVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc) +#define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2701,7 +2700,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vror_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror) +#define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2712,7 +2711,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vround_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb) +#define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2723,7 +2722,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vround_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub) +#define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2734,7 +2733,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vround_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh) +#define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2745,7 +2744,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vround_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh) +#define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2756,7 +2755,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrsad_WubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi) +#define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2767,7 +2766,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrsadacc_WuwWubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc) +#define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2778,7 +2777,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vsat_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub) +#define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2789,7 +2788,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vsat_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh) +#define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2800,7 +2799,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vsxt_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb) +#define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2811,7 +2810,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vsxt_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh) +#define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2822,7 +2821,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vshuffe_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh) +#define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2833,7 +2832,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vshuff_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb) +#define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2844,7 +2843,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vshuffe_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb) +#define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2855,7 +2854,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vshuff_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh) +#define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2866,7 +2865,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vshuffo_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob) +#define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2877,7 +2876,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vshuff_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd) +#define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2888,7 +2887,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vshuffoe_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb) +#define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2899,7 +2898,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vshuffoe_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh) +#define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2910,7 +2909,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vshuffo_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh) +#define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2921,7 +2920,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vsub_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb) +#define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2932,7 +2931,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vsub_WbWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv) +#define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2943,7 +2942,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condnac_QnVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq) +#define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2954,7 +2953,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condnac_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq) +#define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2965,7 +2964,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vsub_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh) +#define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2976,7 +2975,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vsub_WhWh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv) +#define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2987,7 +2986,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condnac_QnVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq) +#define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2998,7 +2997,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condnac_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq) +#define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3009,7 +3008,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vsub_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat) +#define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3020,7 +3019,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vsub_WhWh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv) +#define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3031,7 +3030,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vsub_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw) +#define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3042,7 +3041,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vsub_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh) +#define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3053,7 +3052,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vsub_VubVub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat) +#define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3064,7 +3063,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wub_vsub_WubWub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv) +#define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3075,7 +3074,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vsub_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat) +#define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3086,7 +3085,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vsub_WuhWuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv) +#define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3097,7 +3096,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vsub_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw) +#define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3108,7 +3107,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsub_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw) +#define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3119,7 +3118,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vsub_WwWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv) +#define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3130,7 +3129,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condnac_QnVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq) +#define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3141,7 +3140,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condnac_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq) +#define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3152,7 +3151,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsub_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat) +#define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3163,7 +3162,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vsub_WwWw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv) +#define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3174,7 +3173,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vswap_QVV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap) +#define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3185,7 +3184,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpy_WbRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb) +#define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3196,7 +3195,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpyacc_WhWbRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc) +#define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3207,7 +3206,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpy_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus) +#define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3218,7 +3217,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpyacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc) +#define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3229,7 +3228,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vtmpy_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb) +#define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3240,7 +3239,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vtmpyacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc) +#define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3251,7 +3250,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vunpack_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb) +#define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3262,7 +3261,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vunpack_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh) +#define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3273,7 +3272,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vunpackoor_WhVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob) +#define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3284,7 +3283,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vunpackoor_WwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh) +#define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3295,7 +3294,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vunpack_Vub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub) +#define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3306,7 +3305,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vunpack_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh) +#define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3317,7 +3316,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vxor_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor) +#define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3328,7 +3327,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vzxt_Vub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb) +#define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3339,7 +3338,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vzxt_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh) +#define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 62 @@ -3350,7 +3349,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vb_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb) +#define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3361,7 +3360,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath) +#define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3372,7 +3371,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vsetq2_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2) +#define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3383,7 +3382,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Qb_vshuffe_QhQh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh) +#define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3394,7 +3393,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Qh_vshuffe_QwQw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw) +#define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3405,7 +3404,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vadd_VbVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat) +#define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3416,7 +3415,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vadd_WbWb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv) +#define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3427,7 +3426,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVwQ_carry __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry) +#define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3438,7 +3437,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vadd_vclb_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh) +#define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3449,7 +3448,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_vclb_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw) +#define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3460,7 +3459,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vaddacc_WwVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc) +#define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3471,7 +3470,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vaddacc_WhVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc) +#define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3482,7 +3481,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vadd_VubVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat) +#define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3493,7 +3492,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vaddacc_WwVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc) +#define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3504,7 +3503,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vadd_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat) +#define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3515,7 +3514,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vadd_WuwWuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv) +#define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3526,7 +3525,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vand_QnR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt) +#define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3537,7 +3536,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vandor_VQnR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc) +#define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3548,7 +3547,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vand_QnV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv) +#define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3559,7 +3558,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vand_QV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv) +#define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3570,7 +3569,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vasr_VhVhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat) +#define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3581,7 +3580,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VuwVuwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat) +#define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3592,7 +3591,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VwVwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat) +#define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3603,7 +3602,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vlsr_VubR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb) +#define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3614,7 +3613,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32_VbVbR_nomatch __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm) +#define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3625,7 +3624,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32or_VbVbVbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci) +#define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3636,7 +3635,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32_VbVbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi) +#define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3647,7 +3646,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16_VbVhR_nomatch __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm) +#define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3658,7 +3657,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16or_WhVbVhI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci) +#define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3669,7 +3668,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16_VbVhI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi) +#define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3680,7 +3679,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vmax_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb) +#define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3691,7 +3690,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vmin_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb) +#define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3702,7 +3701,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpa_WuhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb) +#define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3713,7 +3712,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpaacc_WwWuhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc) +#define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3724,7 +3723,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_W_vmpye_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64) +#define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3735,7 +3734,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyi_VwRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub) +#define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3746,7 +3745,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyiacc_VwVwRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc) +#define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3757,7 +3756,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_W_vmpyoacc_WVwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc) +#define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3768,7 +3767,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vround_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub) +#define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3779,7 +3778,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vround_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh) +#define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3790,7 +3789,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vsat_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh) +#define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3801,7 +3800,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vsub_VbVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat) +#define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3812,7 +3811,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vsub_WbWb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv) +#define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3823,7 +3822,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsub_VwVwQ_carry __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry) +#define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3834,7 +3833,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vsub_VubVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat) +#define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3845,7 +3844,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vsub_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat) +#define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3856,7 +3855,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vsub_WuwWuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv) +#define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 65 @@ -3867,7 +3866,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vabs_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb) +#define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3878,7 +3877,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vabs_Vb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat) +#define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3889,7 +3888,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vaslacc_VhVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc) +#define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3900,7 +3899,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasracc_VhVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc) +#define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3911,7 +3910,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VuhVuhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat) +#define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3922,7 +3921,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VuhVuhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat) +#define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3933,7 +3932,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VuwVuwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat) +#define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3944,7 +3943,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vavg_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb) +#define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3955,7 +3954,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vavg_VbVb_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd) +#define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3966,7 +3965,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vavg_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw) +#define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3977,7 +3976,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vavg_VuwVuw_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd) +#define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3988,7 +3987,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vzero __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0) +#define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)() #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3999,7 +3998,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_ARMVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh) +#define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4010,7 +4009,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_AQRMVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq) +#define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4021,7 +4020,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_ARMWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw) +#define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4032,7 +4031,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_AQRMWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq) +#define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4043,7 +4042,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_ARMVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw) +#define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4054,7 +4053,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_AQRMVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq) +#define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4065,7 +4064,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vlut4_VuhPh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4) +#define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4076,7 +4075,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu) +#define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4087,7 +4086,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpaacc_WhWubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc) +#define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4098,7 +4097,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vmpa_VhVhVhPh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat) +#define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4109,7 +4108,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vmpa_VhVhVuhPuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat) +#define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4120,7 +4119,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vmps_VhVhVuhPuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat) +#define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4131,7 +4130,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc) +#define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4142,7 +4141,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vmpye_VuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe) +#define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4153,7 +4152,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vmpyeacc_VuwVuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc) +#define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4164,7 +4163,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vnavg_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb) +#define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4175,7 +4174,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb) +#define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4186,7 +4185,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh) +#define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4197,7 +4196,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw) +#define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4208,7 +4207,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_RMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh) +#define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4219,7 +4218,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatteracc_RMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add) +#define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4230,7 +4229,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_QRMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq) +#define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4241,7 +4240,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_RMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw) +#define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4252,7 +4251,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatteracc_RMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add) +#define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4263,7 +4262,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_QRMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq) +#define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4274,7 +4273,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_RMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw) +#define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4285,7 +4284,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatteracc_RMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add) +#define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4296,7 +4295,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_QRMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq) +#define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 66 @@ -4307,7 +4306,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVwQ_carry_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat) +#define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1)) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 @@ -4318,7 +4317,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vasrinto_WwVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into) +#define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 @@ -4329,7 +4328,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vrotr_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr) +#define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 @@ -4340,7 +4339,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsatdw_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw) +#define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 68 @@ -4351,7 +4350,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpy_WubWbI_h __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10) +#define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 @@ -4362,7 +4361,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpyacc_WwWubWbI_h __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx) +#define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 @@ -4373,7 +4372,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpy_WubWbI_v __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10) +#define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 @@ -4384,9 +4383,801 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpyacc_WwWubWbI_v __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx) +#define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vabs(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vabs(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vadd(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vadd(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vadd(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vadd(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vadd(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.w=vfmv(Vu32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=Vu32.qf16 + C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=Vuu32.qf32 + C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Wqf32(HVX_VectorPair Vuu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=Vu32.qf32 + C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vqf32(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.b=vcvt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.h=vcvt(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt(Vu32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vcvt(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vcvt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt(Vu32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vcvt(Vu32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vcvt(Vu32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vcvt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vcvt(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vdmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vfmax(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vfmax(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vfmin(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vfmin(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vfneg(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vfneg(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vmax(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vmax(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vmin(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vmin(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vx32.hf+=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vmpy(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vmpy(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vsub(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vsub(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vsub(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16 + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmpy_VuhVuh_rs16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + #endif /* __HVX__ */ #endif diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 613ad742c93f..300b022d83b9 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3192,6 +3192,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 840b3daae63c..59601c5ce79d 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -886,7 +886,6 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, diag::warn_maybe_capture_bad_target_this_ptr, Callee, *this); } - return; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index fb99591656d3..4e83fa1fffca 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2216,10 +2216,38 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; case Builtin::BI__builtin_reduce_max: - case Builtin::BI__builtin_reduce_min: - if (SemaBuiltinReduceMath(TheCall)) + case Builtin::BI__builtin_reduce_min: { + if (PrepareBuiltinReduceMathOneArgCall(TheCall)) return ExprError(); + + const Expr *Arg = TheCall->getArg(0); + const auto *TyA = Arg->getType()->getAs(); + if (!TyA) { + Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector ty*/ 4 << Arg->getType(); + return ExprError(); + } + + TheCall->setType(TyA->getElementType()); break; + } + + // __builtin_reduce_xor supports vector of integers only. + case Builtin::BI__builtin_reduce_xor: { + if (PrepareBuiltinReduceMathOneArgCall(TheCall)) + return ExprError(); + + const Expr *Arg = TheCall->getArg(0); + const auto *TyA = Arg->getType()->getAs(); + if (!TyA || !TyA->getElementType()->isIntegerType()) { + Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector of integers */ 6 << Arg->getType(); + return ExprError(); + } + TheCall->setType(TyA->getElementType()); + break; + } + case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); @@ -16882,7 +16910,7 @@ bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) { +bool Sema::PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall) { if (checkArgCount(*this, TheCall, 1)) return true; @@ -16891,14 +16919,6 @@ bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) { return true; TheCall->setArg(0, A.get()); - const VectorType *TyA = A.get()->getType()->getAs(); - if (!TyA) { - SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc(); - return Diag(ArgLoc, diag::err_builtin_invalid_arg_type) - << 1 << /* vector ty*/ 4 << A.get()->getType(); - } - - TheCall->setType(TyA->getElementType()); return false; } diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 083a67db7a91..93c07ccc891f 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -569,7 +569,6 @@ void PreferredTypeBuilder::enterMemAccess(Sema &S, SourceLocation Tok, return; // Keep the expected type, only update the location. ExpectedLoc = Tok; - return; } void PreferredTypeBuilder::enterUnary(Sema &S, SourceLocation Tok, diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 27ba49fb001c..e89cecd08cca 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -237,9 +237,9 @@ static bool isValidCoroutineContext(Sema &S, SourceLocation Loc, // placeholder type shall not be a coroutine." if (FD->getReturnType()->isUndeducedType()) DiagInvalid(DiagAutoRet); - // [dcl.fct.def.coroutine]p1: "The parameter-declaration-clause of the - // coroutine shall not terminate with an ellipsis that is not part of a - // parameter-declaration." + // [dcl.fct.def.coroutine]p1 + // The parameter-declaration-clause of the coroutine shall not terminate with + // an ellipsis that is not part of a parameter-declaration. if (FD->isVariadic()) DiagInvalid(DiagVarargs); @@ -579,8 +579,12 @@ VarDecl *Sema::buildCoroutinePromise(SourceLocation Loc) { /*TopLevelOfInitList=*/false, /*TreatUnavailableAsInvalid=*/false); - // Attempt to initialize the promise type with the arguments. - // If that fails, fall back to the promise type's default constructor. + // [dcl.fct.def.coroutine]5.7 + // promise-constructor-arguments is determined as follows: overload + // resolution is performed on a promise constructor call created by + // assembling an argument list q_1 ... q_n . If a viable constructor is + // found ([over.match.viable]), then promise-constructor-arguments is ( q_1 + // , ..., q_n ), otherwise promise-constructor-arguments is empty. if (InitSeq) { ExprResult Result = InitSeq.Perform(*this, Entity, Kind, CtorArgExprs); if (Result.isInvalid()) { @@ -648,6 +652,10 @@ static void checkNoThrow(Sema &S, const Stmt *E, return; } if (ThrowingDecls.empty()) { + // [dcl.fct.def.coroutine]p15 + // The expression co_­await promise.final_­suspend() shall not be + // potentially-throwing ([except.spec]). + // // First time seeing an error, emit the error message. S.Diag(cast(S.CurContext)->getLocation(), diag::err_coroutine_promise_final_suspend_requires_nothrow); @@ -995,9 +1003,8 @@ static Expr *buildStdNoThrowDeclRef(Sema &S, SourceLocation Loc) { LookupResult Result(S, &S.PP.getIdentifierTable().get("nothrow"), Loc, Sema::LookupOrdinaryName); if (!S.LookupQualifiedName(Result, Std)) { - // FIXME: should have been included already. - // If we require it to include then this diagnostic is no longer - // needed. + // is not requred to include , so we couldn't omit + // the check here. S.Diag(Loc, diag::err_implicit_coroutine_std_nothrow_type_not_found); return nullptr; } @@ -1029,9 +1036,21 @@ static FunctionDecl *findDeleteForPromise(Sema &S, SourceLocation Loc, auto *PointeeRD = PromiseType->getAsCXXRecordDecl(); assert(PointeeRD && "PromiseType must be a CxxRecordDecl type"); + // [dcl.fct.def.coroutine]p12 + // The deallocation function's name is looked up by searching for it in the + // scope of the promise type. If nothing is found, a search is performed in + // the global scope. if (S.FindDeallocationFunction(Loc, PointeeRD, DeleteName, OperatorDelete)) return nullptr; + // FIXME: We didn't implement following selection: + // [dcl.fct.def.coroutine]p12 + // If both a usual deallocation function with only a pointer parameter and a + // usual deallocation function with both a pointer parameter and a size + // parameter are found, then the selected deallocation function shall be the + // one with two parameters. Otherwise, the selected deallocation function + // shall be the function with one parameter. + if (!OperatorDelete) { // Look for a global declaration. const bool CanProvideSize = S.isCompleteType(Loc, PromiseType); @@ -1062,8 +1081,8 @@ void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) { return; } - // Coroutines [stmt.return]p1: - // A return statement shall not appear in a coroutine. + // [stmt.return.coroutine]p1: + // A coroutine shall not enclose a return statement ([stmt.return]). if (Fn->FirstReturnLoc.isValid()) { assert(Fn->FirstCoroutineStmtLoc.isValid() && "first coroutine location not set"); @@ -1164,12 +1183,15 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() { assert(!IsPromiseDependentType && "cannot make statement while the promise type is dependent"); - // [dcl.fct.def.coroutine]/8 - // The unqualified-id get_return_object_on_allocation_failure is looked up in - // the scope of class P by class member access lookup (3.4.5). ... - // If an allocation function returns nullptr, ... the coroutine return value - // is obtained by a call to ... get_return_object_on_allocation_failure(). - + // [dcl.fct.def.coroutine]p10 + // If a search for the name get_­return_­object_­on_­allocation_­failure in + // the scope of the promise type ([class.member.lookup]) finds any + // declarations, then the result of a call to an allocation function used to + // obtain storage for the coroutine state is assumed to return nullptr if it + // fails to obtain storage, ... If the allocation function returns nullptr, + // ... and the return value is obtained by a call to + // T::get_­return_­object_­on_­allocation_­failure(), where T is the + // promise type. DeclarationName DN = S.PP.getIdentifierInfo("get_return_object_on_allocation_failure"); LookupResult Found(S, DN, Loc, Sema::LookupMemberName); @@ -1215,12 +1237,11 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { const bool RequiresNoThrowAlloc = ReturnStmtOnAllocFailure != nullptr; - // [dcl.fct.def.coroutine]/7 - // Lookup allocation functions using a parameter list composed of the - // requested size of the coroutine state being allocated, followed by - // the coroutine function's arguments. If a matching allocation function - // exists, use it. Otherwise, use an allocation function that just takes - // the requested size. + // According to [dcl.fct.def.coroutine]p9, Lookup allocation functions using a + // parameter list composed of the requested size of the coroutine state being + // allocated, followed by the coroutine function's arguments. If a matching + // allocation function exists, use it. Otherwise, use an allocation function + // that just takes the requested size. FunctionDecl *OperatorNew = nullptr; FunctionDecl *OperatorDelete = nullptr; @@ -1228,21 +1249,32 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { bool PassAlignment = false; SmallVector PlacementArgs; - // [dcl.fct.def.coroutine]/7 - // "The allocation function’s name is looked up in the scope of P. - // [...] If the lookup finds an allocation function in the scope of P, - // overload resolution is performed on a function call created by assembling - // an argument list. The first argument is the amount of space requested, - // and has type std::size_t. The lvalues p1 ... pn are the succeeding - // arguments." + // [dcl.fct.def.coroutine]p9 + // An implementation may need to allocate additional storage for a + // coroutine. + // This storage is known as the coroutine state and is obtained by calling a + // non-array allocation function ([basic.stc.dynamic.allocation]). The + // allocation function's name is looked up by searching for it in the scope of + // the promise type. + // - If any declarations are found, overload resolution is performed on a + // function call created by assembling an argument list. The first argument is + // the amount of space requested, and has type std::size_t. The + // lvalues p1 ... pn are the succeeding arguments. // // ...where "p1 ... pn" are defined earlier as: // - // [dcl.fct.def.coroutine]/3 - // "For a coroutine f that is a non-static member function, let P1 denote the - // type of the implicit object parameter (13.3.1) and P2 ... Pn be the types - // of the function parameters; otherwise let P1 ... Pn be the types of the - // function parameters. Let p1 ... pn be lvalues denoting those objects." + // [dcl.fct.def.coroutine]p3 + // The promise type of a coroutine is `std::coroutine_traits` + // , where R is the return type of the function, and `P1, ..., Pn` are the + // sequence of types of the non-object function parameters, preceded by the + // type of the object parameter ([dcl.fct]) if the coroutine is a non-static + // member function. [dcl.fct.def.coroutine]p4 In the following, p_i is an + // lvalue of type P_i, where p1 denotes the object parameter and p_i+1 denotes + // the i-th non-object function parameter for a non-static member function, + // and p_i denotes the i-th function parameter otherwise. For a non-static + // member function, q_1 is an lvalue that denotes *this; any other q_i is an + // lvalue that denotes the parameter copy corresponding to p_i. if (auto *MD = dyn_cast(&FD)) { if (MD->isInstance() && !isLambdaCallOperator(MD)) { ExprResult ThisExpr = S.ActOnCXXThis(Loc); @@ -1273,10 +1305,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { /*isArray*/ false, PassAlignment, PlacementArgs, OperatorNew, UnusedResult, /*Diagnose*/ false); - // [dcl.fct.def.coroutine]/7 - // "If no matching function is found, overload resolution is performed again - // on a function call created by passing just the amount of space required as - // an argument of type std::size_t." + // [dcl.fct.def.coroutine]p9 + // If no viable function is found ([over.match.viable]), overload resolution + // is performed again on a function call created by passing just the amount of + // space required as an argument of type std::size_t. if (!OperatorNew && !PlacementArgs.empty()) { PlacementArgs.clear(); S.FindAllocationFunctions(Loc, SourceRange(), /*NewScope*/ Sema::AFS_Class, @@ -1285,10 +1317,11 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { OperatorNew, UnusedResult, /*Diagnose*/ false); } - // [dcl.fct.def.coroutine]/7 - // "The allocation function’s name is looked up in the scope of P. If this - // lookup fails, the allocation function’s name is looked up in the global - // scope." + // [dcl.fct.def.coroutine]p9 + // The allocation function's name is looked up by searching for it in the + // scope of the promise type. + // - If any declarations are found, ... + // - Otherwise, a search is performed in the global scope. if (!OperatorNew) { S.FindAllocationFunctions(Loc, SourceRange(), /*NewScope*/ Sema::AFS_Global, /*DeleteScope*/ Sema::AFS_Both, PromiseType, @@ -1328,8 +1361,12 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { } } - if ((OperatorDelete = findDeleteForPromise(S, Loc, PromiseType)) == nullptr) + if ((OperatorDelete = findDeleteForPromise(S, Loc, PromiseType)) == nullptr) { + // FIXME: We should add an error here. According to: + // [dcl.fct.def.coroutine]p12 + // If no usual deallocation function is found, the program is ill-formed. return false; + } Expr *FramePtr = S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); @@ -1368,7 +1405,11 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { SmallVector DeleteArgs{CoroFree}; - // Check if we need to pass the size. + // [dcl.fct.def.coroutine]p12 + // The selected deallocation function shall be called with the address of + // the block of storage to be reclaimed as its first argument. If a + // deallocation function with a parameter of type std::size_t is + // used, the size of the block is passed as the corresponding argument. const auto *OpDeleteType = OpDeleteQualType.getTypePtr()->castAs(); if (OpDeleteType->getNumParams() > 1) @@ -1391,9 +1432,13 @@ bool CoroutineStmtBuilder::makeOnFallthrough() { assert(!IsPromiseDependentType && "cannot make statement while the promise type is dependent"); - // [dcl.fct.def.coroutine]/4 - // The unqualified-ids 'return_void' and 'return_value' are looked up in - // the scope of class P. If both are found, the program is ill-formed. + // [dcl.fct.def.coroutine]/p6 + // If searches for the names return_­void and return_­value in the scope of + // the promise type each find any declarations, the program is ill-formed. + // [Note 1: If return_­void is found, flowing off the end of a coroutine is + // equivalent to a co_­return with no operand. Otherwise, flowing off the end + // of a coroutine results in undefined behavior ([stmt.return.coroutine]). — + // end note] bool HasRVoid, HasRValue; LookupResult LRVoid = lookupMember(S, "return_void", PromiseRecordDecl, Loc, HasRVoid); @@ -1414,18 +1459,20 @@ bool CoroutineStmtBuilder::makeOnFallthrough() { << LRValue.getLookupName(); return false; } else if (!HasRVoid && !HasRValue) { - // FIXME: The PDTS currently specifies this case as UB, not ill-formed. - // However we still diagnose this as an error since until the PDTS is fixed. - S.Diag(FD.getLocation(), - diag::err_coroutine_promise_requires_return_function) - << PromiseRecordDecl; - S.Diag(PromiseRecordDecl->getLocation(), diag::note_defined_here) - << PromiseRecordDecl; - return false; + // We need to set 'Fallthrough'. Otherwise the other analysis part might + // think the coroutine has defined a return_value method. So it might emit + // **false** positive warning. e.g., + // + // promise_without_return_func foo() { + // co_await something(); + // } + // + // Then AnalysisBasedWarning would emit a warning about `foo()` lacking a + // co_return statements, which isn't correct. + Fallthrough = S.ActOnNullStmt(PromiseRecordDecl->getLocation()); + if (Fallthrough.isInvalid()) + return false; } else if (HasRVoid) { - // If the unqualified-id return_void is found, flowing off the end of a - // coroutine is equivalent to a co_return with no operand. Otherwise, - // flowing off the end of a coroutine results in undefined behavior. Fallthrough = S.BuildCoreturnStmt(FD.getLocation(), nullptr, /*IsImplicit*/false); Fallthrough = S.ActOnFinishFullStmt(Fallthrough.get()); @@ -1481,8 +1528,9 @@ bool CoroutineStmtBuilder::makeOnException() { } bool CoroutineStmtBuilder::makeReturnObject() { - // Build implicit 'p.get_return_object()' expression and form initialization - // of return type from it. + // [dcl.fct.def.coroutine]p7 + // The expression promise.get_­return_­object() is used to initialize the + // returned reference or prvalue result object of a call to a coroutine. ExprResult ReturnObject = buildPromiseCall(S, Fn.CoroutinePromise, Loc, "get_return_object", None); if (ReturnObject.isInvalid()) @@ -1620,6 +1668,12 @@ bool Sema::buildCoroutineParameterMoves(SourceLocation Loc) { if (!ScopeInfo->CoroutineParameterMoves.empty()) return false; + // [dcl.fct.def.coroutine]p13 + // When a coroutine is invoked, after initializing its parameters + // ([expr.call]), a copy is created for each coroutine parameter. For a + // parameter of type cv T, the copy is a variable of type cv T with + // automatic storage duration that is direct-initialized from an xvalue of + // type T referring to the parameter. for (auto *PD : FD->parameters()) { if (PD->getType()->isDependentType()) continue; @@ -1636,7 +1690,9 @@ bool Sema::buildCoroutineParameterMoves(SourceLocation Loc) { CExpr = castForMoving(*this, PDRefExpr.get()); else CExpr = PDRefExpr.get(); - + // [dcl.fct.def.coroutine]p13 + // The initialization and destruction of each parameter copy occurs in the + // context of the called coroutine. auto D = buildVarDecl(*this, Loc, PD->getType(), PD->getIdentifier()); AddInitializerToDecl(D, CExpr, /*DirectInit=*/true); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d25f329f85e4..54f0242d2ca1 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1346,7 +1346,7 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, // implicitly capturing the *enclosing object* by reference (see loop // above)). assert((!ByCopy || - dyn_cast(FunctionScopes[MaxFunctionScopesIndex])) && + isa(FunctionScopes[MaxFunctionScopesIndex])) && "Only a lambda can capture the enclosing object (referred to by " "*this) by copy"); QualType ThisTy = getCurrentThisType(); diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index f497199badc1..a4b9f3c242c1 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -383,11 +383,18 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, if (!ModuleScopes.empty()) Context.addModuleInitializer(ModuleScopes.back().Module, Import); - // Re-export the module if needed. if (!ModuleScopes.empty() && ModuleScopes.back().ModuleInterface) { + // Re-export the module if the imported module is exported. + // Note that we don't need to add re-exported module to Imports field + // since `Exports` implies the module is imported already. if (ExportLoc.isValid() || getEnclosingExportDecl(Import)) getCurrentModule()->Exports.emplace_back(Mod, false); + else + getCurrentModule()->Imports.insert(Mod); } else if (ExportLoc.isValid()) { + // [module.interface]p1: + // An export-declaration shall inhabit a namespace scope and appear in the + // purview of a module interface unit. Diag(ExportLoc, diag::err_export_not_in_module_interface); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5ecfe7fca55c..ba0481874577 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6354,6 +6354,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -10754,7 +10755,6 @@ class OpenMPAtomicUpdateChecker { bool checkBinaryOperation(BinaryOperator *AtomicBinOp, unsigned DiagId = 0, unsigned NoteId = 0); }; -} // namespace bool OpenMPAtomicUpdateChecker::checkBinaryOperation( BinaryOperator *AtomicBinOp, unsigned DiagId, unsigned NoteId) { @@ -10915,6 +10915,7 @@ bool OpenMPAtomicUpdateChecker::checkStatement(Stmt *S, unsigned DiagId, } return ErrorFound != NoError; } +} // namespace StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, Stmt *AStmt, @@ -10939,7 +10940,8 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, case OMPC_read: case OMPC_write: case OMPC_update: - case OMPC_capture: { + case OMPC_capture: + case OMPC_compare: { if (AtomicKind != OMPC_unknown) { Diag(C->getBeginLoc(), diag::err_omp_atomic_several_clauses) << SourceRange(C->getBeginLoc(), C->getEndLoc()); @@ -11383,15 +11385,21 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, SourceRange(Body->getBeginLoc(), Body->getBeginLoc()); ErrorFound = NotACompoundStatement; } - if (ErrorFound != NoError) { - Diag(ErrorLoc, diag::err_omp_atomic_capture_not_compound_statement) - << ErrorRange; - Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; - return StmtError(); - } - if (CurContext->isDependentContext()) - UE = V = E = X = nullptr; } + if (ErrorFound != NoError) { + Diag(ErrorLoc, diag::err_omp_atomic_capture_not_compound_statement) + << ErrorRange; + Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; + return StmtError(); + } + if (CurContext->isDependentContext()) + UE = V = E = X = nullptr; + } else if (AtomicKind == OMPC_compare) { + // TODO: For now we emit an error here and in emitOMPAtomicExpr we ignore + // code gen. + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "atomic compare is not supported for now"); + Diag(AtomicKindLoc, DiagID); } setFunctionHasBranchProtectedScope(); @@ -13472,6 +13480,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -14303,6 +14312,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -14764,6 +14774,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -15069,6 +15080,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -15257,6 +15269,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_capture: Res = ActOnOpenMPCaptureClause(StartLoc, EndLoc); break; + case OMPC_compare: + Res = ActOnOpenMPCompareClause(StartLoc, EndLoc); + break; case OMPC_seq_cst: Res = ActOnOpenMPSeqCstClause(StartLoc, EndLoc); break; @@ -15403,6 +15418,11 @@ OMPClause *Sema::ActOnOpenMPCaptureClause(SourceLocation StartLoc, return new (Context) OMPCaptureClause(StartLoc, EndLoc); } +OMPClause *Sema::ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (Context) OMPCompareClause(StartLoc, EndLoc); +} + OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc) { return new (Context) OMPSeqCstClause(StartLoc, EndLoc); @@ -15871,6 +15891,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0607d3a774aa..7a038301a249 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6541,7 +6541,6 @@ static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr, StringRef BTFTypeTag = StrLiteral->getString(); Type = State.getAttributedType( ::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type, Type); - return; } /// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 39b659753dfa..298a3f7a83d8 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9460,6 +9460,13 @@ TreeTransform::TransformOMPCaptureClause(OMPCaptureClause *C) { return C; } +template +OMPClause * +TreeTransform::TransformOMPCompareClause(OMPCompareClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPSeqCstClause(OMPSeqCstClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index d16697b48ca9..f93e0d2ed1c4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11765,6 +11765,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_capture: C = new (Context) OMPCaptureClause(); break; + case llvm::omp::OMPC_compare: + C = new (Context) OMPCompareClause(); + break; case llvm::omp::OMPC_seq_cst: C = new (Context) OMPSeqCstClause(); break; @@ -12123,6 +12126,8 @@ void OMPClauseReader::VisitOMPUpdateClause(OMPUpdateClause *C) { void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {} +void OMPClauseReader::VisitOMPCompareClause(OMPCompareClause *) {} + void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3e4153b3b612..65a780e67510 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6252,6 +6252,8 @@ void OMPClauseWriter::VisitOMPUpdateClause(OMPUpdateClause *C) { void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {} +void OMPClauseWriter::VisitOMPCompareClause(OMPCompareClause *) {} + void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseWriter::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h index 6a40f8eda5fa..b4352b450c7f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h @@ -25,8 +25,6 @@ bool isStdSmartPtrCall(const CallEvent &Call); bool isStdSmartPtr(const CXXRecordDecl *RD); bool isStdSmartPtr(const Expr *E); -bool isStdSmartPtr(const CXXRecordDecl *RD); - /// Returns whether the smart pointer is null or not. bool isNullSmartPtr(const ProgramStateRef State, const MemRegion *ThisRegion); diff --git a/clang/test/AST/ast-dump-decl-json.c b/clang/test/AST/ast-dump-decl-json.c index 9264590d67b8..c958a624c314 100644 --- a/clang/test/AST/ast-dump-decl-json.c +++ b/clang/test/AST/ast-dump-decl-json.c @@ -91,6 +91,7 @@ void testParmVarDecl(int TestParmVarDecl); // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TypedefDecl", // CHECK-NEXT: "loc": { @@ -137,6 +138,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -165,6 +167,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -195,6 +198,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -250,6 +254,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -360,6 +365,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "LabelDecl", // CHECK-NEXT: "loc": { @@ -385,6 +391,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "name": "TestLabelDecl" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TypedefDecl", // CHECK-NEXT: "loc": { @@ -421,6 +428,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumDecl", // CHECK-NEXT: "loc": { @@ -474,6 +482,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -583,6 +592,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumDecl", // CHECK-NEXT: "loc": { @@ -607,6 +617,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "name": "TestEnumDeclForward" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -662,6 +673,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -689,6 +701,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "completeDefinition": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -771,6 +784,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -851,6 +865,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -876,6 +891,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "tagUsed": "struct" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumConstantDecl", // CHECK-NEXT: "loc": { @@ -903,6 +919,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumConstantDecl", // CHECK-NEXT: "loc": { @@ -976,6 +993,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -1108,6 +1126,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1159,7 +1178,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1185,7 +1203,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "y", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "enum (unnamed at {{.*}}:69:29)", // CHECK-NEXT: "qualType": "enum (unnamed enum at {{.*}}:69:29)" @@ -1283,6 +1300,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1332,7 +1350,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "enum Enum", // CHECK-NEXT: "qualType": "enum Enum" @@ -1454,6 +1471,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1502,7 +1520,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1510,6 +1527,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1538,6 +1556,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1567,6 +1586,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "storageClass": "extern" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1596,6 +1616,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "inline": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FieldDecl", // CHECK-NEXT: "loc": { @@ -1623,6 +1644,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FieldDecl", // CHECK-NEXT: "loc": { @@ -1697,6 +1719,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1725,6 +1748,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1754,6 +1778,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "storageClass": "extern" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1783,6 +1808,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "tls": "static" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1835,6 +1861,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { @@ -1857,7 +1884,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "TestParmVarDecl", -// CHECK-NEXT: "mangledName": "TestParmVarDecl", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-decl-json.m b/clang/test/AST/ast-dump-decl-json.m index b970d73e906b..dbe20d8edc73 100644 --- a/clang/test/AST/ast-dump-decl-json.m +++ b/clang/test/AST/ast-dump-decl-json.m @@ -85,6 +85,7 @@ void f() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -121,6 +122,7 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCImplementationDecl", // CHECK-NEXT: "loc": { @@ -291,6 +293,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCMethodDecl", // CHECK-NEXT: "loc": { @@ -341,7 +344,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -349,6 +351,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCMethodDecl", // CHECK-NEXT: "loc": { @@ -389,7 +392,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "self", -// CHECK-NEXT: "mangledName": "_self", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "testObjCMethodDecl *" // CHECK-NEXT: } @@ -404,7 +406,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "_cmd", -// CHECK-NEXT: "mangledName": "__cmd", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -433,7 +434,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -500,6 +500,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCProtocolDecl", // CHECK-NEXT: "loc": { @@ -555,6 +556,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -628,6 +630,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCImplementationDecl", // CHECK-NEXT: "loc": { @@ -728,7 +731,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "self", -// CHECK-NEXT: "mangledName": "_self", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "TestObjCClass *" // CHECK-NEXT: } @@ -743,7 +745,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "_cmd", -// CHECK-NEXT: "mangledName": "__cmd", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -773,6 +774,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCCategoryDecl", // CHECK-NEXT: "loc": { @@ -845,6 +847,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -916,6 +919,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCCategoryImplDecl", // CHECK-NEXT: "loc": { @@ -989,7 +993,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "self", -// CHECK-NEXT: "mangledName": "_self", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "TestObjCClass *" // CHECK-NEXT: } @@ -1004,7 +1007,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "_cmd", -// CHECK-NEXT: "mangledName": "__cmd", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -1034,6 +1036,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCCompatibleAliasDecl", // CHECK-NEXT: "loc": { @@ -1063,6 +1066,7 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -1246,7 +1250,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "foo", -// CHECK-NEXT: "mangledName": "_foo", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1331,7 +1334,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "bar", -// CHECK-NEXT: "mangledName": "_bar", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1341,6 +1343,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCImplementationDecl", // CHECK-NEXT: "loc": { @@ -1580,7 +1583,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "foo", -// CHECK-NEXT: "mangledName": "_foo", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1666,7 +1668,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "bar", -// CHECK-NEXT: "mangledName": "_bar", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1676,6 +1677,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1727,7 +1729,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1840,7 +1841,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "_y", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1946,7 +1946,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "_y", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1962,6 +1961,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1984,7 +1984,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "Test", -// CHECK-NEXT: "mangledName": "_Test", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int", // CHECK-NEXT: "qualType": "typeof (B.foo)" diff --git a/clang/test/AST/ast-dump-expr-json.c b/clang/test/AST/ast-dump-expr-json.c index c322625cb2b2..6409ed5fe216 100644 --- a/clang/test/AST/ast-dump-expr-json.c +++ b/clang/test/AST/ast-dump-expr-json.c @@ -109,6 +109,7 @@ void PrimaryExpressions(int a) { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=FunctionDecl + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -270,6 +271,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -320,7 +322,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -529,6 +530,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -579,7 +581,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -963,6 +964,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1013,7 +1015,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1040,7 +1041,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3335,6 +3335,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3386,7 +3387,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3413,7 +3413,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -4003,6 +4002,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4053,7 +4053,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -4080,7 +4079,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" @@ -4108,7 +4106,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "c", -// CHECK-NEXT: "mangledName": "c", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "struct S *" // CHECK-NEXT: } @@ -4900,6 +4897,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4950,7 +4948,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp index 3ef2f78551b6..47787d5da667 100644 --- a/clang/test/AST/ast-dump-expr-json.cpp +++ b/clang/test/AST/ast-dump-expr-json.cpp @@ -155,6 +155,7 @@ void TestNonADLCall3() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -271,6 +272,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -322,7 +324,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "obj1", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4obj1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } @@ -349,7 +350,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "obj2", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4obj2", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S *" // CHECK-NEXT: } @@ -376,7 +376,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "data", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4data", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int S::*" // CHECK-NEXT: } @@ -403,7 +402,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "call", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4call", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "void (S::*)(int)" // CHECK-NEXT: } @@ -1018,6 +1016,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1069,7 +1068,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "s", -// CHECK-NEXT: "mangledName": "_ZZ11TestCastingPK1SE1s", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "const S *" // CHECK-NEXT: } @@ -1405,6 +1403,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -1508,7 +1507,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ20TestUnaryExpressionsPiE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -2483,6 +2481,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -2534,7 +2533,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ22TestPostfixExpressions1SPS_P1UIiEE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } @@ -2561,7 +2559,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ22TestPostfixExpressions1SPS_P1UIiEE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S *" // CHECK-NEXT: } @@ -2588,7 +2585,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "r", -// CHECK-NEXT: "mangledName": "_ZZ22TestPostfixExpressions1SPS_P1UIiEE1r", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "U *" // CHECK-NEXT: } @@ -3560,6 +3556,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -3663,7 +3660,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ22TestPrimaryExpressionsDpT_E1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Ts..." // CHECK-NEXT: }, @@ -4363,7 +4359,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ22TestPrimaryExpressionsDpT_E1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -4390,7 +4385,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "c", -// CHECK-NEXT: "mangledName": "_ZZ22TestPrimaryExpressionsDpT_E1c", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -4729,7 +4723,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZZ22TestPrimaryExpressionsDpT_ENKUlizE_clEizE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -4830,7 +4823,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZZ22TestPrimaryExpressionsDpT_ENUlizE_8__invokeEizE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -7759,6 +7751,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -7844,7 +7837,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ11TestADLCallvE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" @@ -8197,6 +8189,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -8282,7 +8275,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ14TestNonADLCallvE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" @@ -8478,6 +8470,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -8563,7 +8556,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ15TestNonADLCall2vE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" @@ -8961,6 +8953,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -9046,7 +9039,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZN19test_adl_call_three15TestNonADLCall3EvE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-expr-json.m b/clang/test/AST/ast-dump-expr-json.m index 062f6c06d1aa..cc63b1ffd249 100644 --- a/clang/test/AST/ast-dump-expr-json.m +++ b/clang/test/AST/ast-dump-expr-json.m @@ -97,6 +97,7 @@ void TestObjCBoolLiteral() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -243,6 +244,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -294,7 +296,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", -// CHECK-NEXT: "mangledName": "Obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "I *" // CHECK-NEXT: } @@ -423,6 +424,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -585,6 +587,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -669,7 +672,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "s", -// CHECK-NEXT: "mangledName": "s", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -709,6 +711,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -760,7 +763,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", -// CHECK-NEXT: "mangledName": "Obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", @@ -896,6 +898,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -947,7 +950,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", -// CHECK-NEXT: "mangledName": "Obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "J *" // CHECK-NEXT: } @@ -1587,7 +1589,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -1919,6 +1920,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1970,7 +1972,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Array", -// CHECK-NEXT: "mangledName": "Array", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "NSMutableArray *" // CHECK-NEXT: } @@ -1997,7 +1998,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Dict", -// CHECK-NEXT: "mangledName": "Dict", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "NSMutableDictionary *" // CHECK-NEXT: } @@ -2863,7 +2863,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "i", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", @@ -4767,6 +4766,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4818,7 +4818,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Ptr", -// CHECK-NEXT: "mangledName": "Ptr", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "I *" // CHECK-NEXT: } @@ -4972,6 +4971,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-file-line-json.c b/clang/test/AST/ast-dump-file-line-json.c index d4899d8fbbe9..b42107d415b7 100644 --- a/clang/test/AST/ast-dump-file-line-json.c +++ b/clang/test/AST/ast-dump-file-line-json.c @@ -12,6 +12,7 @@ int d; int e; // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TranslationUnitDecl", // CHECK-NEXT: "loc": {}, @@ -171,7 +172,7 @@ int e; // CHECK-NEXT: "offset": 105, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 4, -// CHECK-NEXT: "presumedFile": "test.c", +// CHECK-NEXT: "presumedFile": "{{.*}}", // CHECK-NEXT: "col": 5, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, @@ -256,7 +257,7 @@ int e; // CHECK-NEXT: "loc": { // CHECK-NEXT: "offset": 163, // CHECK-NEXT: "line": 11, -// CHECK-NEXT: "presumedFile": "test.c", +// CHECK-NEXT: "presumedFile": "{{.*}}", // CHECK-NEXT: "col": 5, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-funcs-json.cpp b/clang/test/AST/ast-dump-funcs-json.cpp index aeaf3540a987..45216274d5df 100644 --- a/clang/test/AST/ast-dump-funcs-json.cpp +++ b/clang/test/AST/ast-dump-funcs-json.cpp @@ -43,6 +43,7 @@ int main() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -71,6 +72,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -99,6 +101,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -127,6 +130,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -155,6 +159,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -183,6 +188,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -286,6 +292,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -316,6 +323,7 @@ int main() { // CHECK-NEXT: "pure": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -434,6 +442,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -462,6 +471,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -510,6 +520,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -539,6 +550,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -567,6 +579,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -615,7 +628,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ5Test3iiE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -641,7 +653,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ5Test3iiE1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -649,6 +660,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -697,7 +709,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ5Test4iiE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -723,7 +734,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ5Test4iiE1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -755,6 +765,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -784,6 +795,7 @@ int main() { // CHECK-NEXT: "constexpr": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -813,6 +825,7 @@ int main() { // CHECK-NEXT: "storageClass": "static" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -842,6 +855,7 @@ int main() { // CHECK-NEXT: "storageClass": "extern" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -871,6 +885,7 @@ int main() { // CHECK-NEXT: "inline": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -899,6 +914,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -927,6 +943,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -955,6 +972,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -1062,6 +1080,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1132,6 +1151,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-if-json.cpp b/clang/test/AST/ast-dump-if-json.cpp index 6c8065b54e33..3f3817b88ba4 100644 --- a/clang/test/AST/ast-dump-if-json.cpp +++ b/clang/test/AST/ast-dump-if-json.cpp @@ -711,7 +711,6 @@ void func(int val) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ4funciE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -894,7 +893,6 @@ void func(int val) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ4funciE1i_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-stmt-json.c b/clang/test/AST/ast-dump-stmt-json.c index fe3ec558112a..899bdc8e8580 100644 --- a/clang/test/AST/ast-dump-stmt-json.c +++ b/clang/test/AST/ast-dump-stmt-json.c @@ -147,9 +147,14 @@ void TestLineNumbers(void) { #undef FOO } +void TestVLA(int n) { + double vla[n]; +} + // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=VarDecl,CompoundStmt + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -202,6 +207,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -319,6 +325,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -374,7 +381,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -443,7 +449,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "y", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -469,7 +474,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "z", -// CHECK-NEXT: "mangledName": "z", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -479,6 +483,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -659,6 +664,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -715,7 +721,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "T1", -// CHECK-NEXT: "mangledName": "T1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "char" // CHECK-NEXT: }, @@ -808,7 +813,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "T2", -// CHECK-NEXT: "mangledName": "T2", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -1302,6 +1306,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -1931,7 +1936,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "j", -// CHECK-NEXT: "mangledName": "j", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -2059,6 +2063,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -2214,7 +2219,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "ptr", -// CHECK-NEXT: "mangledName": "ptr", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "void *" // CHECK-NEXT: }, @@ -2342,6 +2346,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -2839,6 +2844,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -3622,6 +3628,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -3873,7 +3880,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -4830,6 +4836,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -5075,6 +5082,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -5167,7 +5175,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5256,6 +5263,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -5312,7 +5320,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -5358,7 +5365,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -5406,3 +5412,69 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: ] // CHECK-NEXT: } + + +// CHECK-NOT: {{^}}Dumping +// CHECK: "kind": "CompoundStmt", +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 1683, +// CHECK-NEXT: "col": 21, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 1702, +// CHECK-NEXT: "line": 152, +// CHECK-NEXT: "presumedLine": 200007, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "DeclStmt", +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 1687, +// CHECK-NEXT: "line": 151, +// CHECK-NEXT: "presumedLine": 200006, +// CHECK-NEXT: "col": 3, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 1700, +// CHECK-NEXT: "col": 16, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 1694, +// CHECK-NEXT: "col": 10, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 1687, +// CHECK-NEXT: "col": 3, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 1699, +// CHECK-NEXT: "col": 15, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "vla", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "double[n]" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-stmt-json.cpp b/clang/test/AST/ast-dump-stmt-json.cpp index b8988cc16d43..5284553448a5 100644 --- a/clang/test/AST/ast-dump-stmt-json.cpp +++ b/clang/test/AST/ast-dump-stmt-json.cpp @@ -125,6 +125,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=FunctionDecl,FunctionTemplateDecl,UsingDecl,UsingShadowDecl + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -171,6 +172,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingDecl", // CHECK-NEXT: "loc": { @@ -194,6 +196,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "name": "n::function" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingShadowDecl", // CHECK-NEXT: "loc": { @@ -224,6 +227,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingDecl", // CHECK-NEXT: "loc": { @@ -247,6 +251,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "name": "n::Variable" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingShadowDecl", // CHECK-NEXT: "loc": { @@ -277,6 +282,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -360,7 +366,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "f", -// CHECK-NEXT: "mangledName": "_ZZ12TestFunctionvE1f", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "void (*)()" // CHECK-NEXT: }, @@ -511,6 +516,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -632,7 +638,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ10TestCatch1vE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -663,6 +668,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -792,6 +798,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -876,7 +883,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ19TestAllocationExprsvE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -1425,6 +1431,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1483,6 +1490,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1562,6 +1570,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1620,6 +1629,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1699,6 +1709,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1739,6 +1750,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1791,6 +1803,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1831,6 +1844,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1883,6 +1897,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2018,7 +2033,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ27TestDependentAllocationExprvE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "T *" // CHECK-NEXT: }, @@ -2106,6 +2120,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2241,7 +2256,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "obj", -// CHECK-NEXT: "mangledName": "_ZZ28TestDependentScopeMemberExprvE3obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: } @@ -2613,6 +2627,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2748,7 +2763,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "obj", -// CHECK-NEXT: "mangledName": "_ZZ36TestDependentScopeTemplateMemberExprvE3obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "OtherDependentScopeMemberExprWrapper" // CHECK-NEXT: } @@ -2880,6 +2894,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -2963,7 +2978,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "us", -// CHECK-NEXT: "mangledName": "_ZZ17TestUnionInitListvE2us", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "U[3]" // CHECK-NEXT: }, @@ -3079,6 +3093,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3129,7 +3144,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ10TestSwitchiE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3208,7 +3222,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ10TestSwitchiE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3290,6 +3303,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3340,7 +3354,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ6TestIfbE1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "bool" // CHECK-NEXT: } @@ -3420,7 +3433,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ6TestIfbE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "const int" // CHECK-NEXT: }, @@ -3974,6 +3986,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4076,7 +4089,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -4145,7 +4157,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "j", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1j", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -4390,7 +4401,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "vals", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE4vals", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int[10]" // CHECK-NEXT: } @@ -4456,7 +4466,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__range1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__range1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int (&)[10]" // CHECK-NEXT: }, @@ -4533,7 +4542,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__begin1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -4634,7 +4642,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE6__end1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -4963,7 +4970,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "v", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1v", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5133,7 +5139,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "C", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1C", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, @@ -5227,7 +5232,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__range1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__range1_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Container &" // CHECK-NEXT: }, @@ -5304,7 +5308,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__begin1_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -5452,7 +5455,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE6__end1_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -5784,7 +5786,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "v", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1v_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5971,7 +5972,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -6017,7 +6017,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__range1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__range1_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int (&)[10]" // CHECK-NEXT: }, @@ -6094,7 +6093,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__begin1_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -6195,7 +6193,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE6__end1_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -6524,7 +6521,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "v", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1v_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -6660,6 +6656,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -6761,7 +6758,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "T", -// CHECK-NEXT: "mangledName": "_ZZ33TestDependentGenericSelectionExprT_E1T", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Ty" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-stmt-json.m b/clang/test/AST/ast-dump-stmt-json.m index 44935d832674..c1898bd13d65 100644 --- a/clang/test/AST/ast-dump-stmt-json.m +++ b/clang/test/AST/ast-dump-stmt-json.m @@ -21,6 +21,7 @@ void TestObjCAtCatchStmt() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -72,7 +73,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -254,6 +254,7 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -305,7 +306,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -487,6 +487,7 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -608,7 +609,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "A *" // CHECK-NEXT: } @@ -755,7 +755,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "__context", -// CHECK-NEXT: "mangledName": "__context", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "struct (unnamed at {{.*}}:18:14) *" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-template-decls-json.cpp b/clang/test/AST/ast-dump-template-decls-json.cpp index 5e7a547e2161..fc1b883f5dac 100644 --- a/clang/test/AST/ast-dump-template-decls-json.cpp +++ b/clang/test/AST/ast-dump-template-decls-json.cpp @@ -60,6 +60,7 @@ void i(); // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TranslationUnitDecl", // CHECK-NEXT: "loc": {}, @@ -1039,7 +1040,6 @@ void i(); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ1fiE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, diff --git a/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm b/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm index 61f13ce01925..9fa5120f8842 100644 --- a/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm +++ b/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm @@ -1,8 +1,8 @@ // Check that the compiler wouldn't crash due to inconsistent namesapce linkage // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: %clang -std=c++20 %S/Inputs/p2.cppm --precompile -o %t/Y.pcm -// RUN: %clang -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs %s -c -Xclang -verify +// RUN: %clang_cc1 -x c++ -std=c++20 %S/Inputs/p2.cppm -emit-module-interface -o %t/Y.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs %s -fsyntax-only -verify // expected-no-diagnostics export module X; import Y; diff --git a/clang/test/CodeGen/aggregate-assign-call.c b/clang/test/CodeGen/aggregate-assign-call.c index 5aeaca19379a..d25fc39cfe75 100644 --- a/clang/test/CodeGen/aggregate-assign-call.c +++ b/clang/test/CodeGen/aggregate-assign-call.c @@ -4,7 +4,8 @@ // Ensure that we place appropriate lifetime markers around indirectly returned // temporaries, and that the lifetime.ends appear in a timely manner. // -// -O1 is used so lifetime markers actually get emitted. +// -O1 is used so lifetime markers actually get emitted and optnone is added +// to avoid elimination of lifetime markers by optimizations. struct S { int ns[40]; @@ -13,25 +14,39 @@ struct S { struct S foo(void); // CHECK-LABEL: define dso_local void @bar +__attribute__((optnone)) struct S bar() { // O0-NOT: @llvm.lifetime.start // O0-NOT: @llvm.lifetime.end struct S r; - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP1:[^)]+]]) + // O1: %[[TMP1_ALLOCA:[^ ]+]] = alloca %struct.S + // O1: %[[TMP2_ALLOCA:[^ ]+]] = alloca %struct.S + // O1: %[[TMP3_ALLOCA:[^ ]+]] = alloca %struct.S + + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: call void @foo r = foo(); - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP1]]) + // O1: memcpy + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP2:[^)]+]]) + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: call void @foo r = foo(); - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP2]]) + // O1: memcpy + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP3:[^)]+]]) + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP3_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: call void @foo r = foo(); - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP3]]) + // O1: memcpy + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP3_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) return r; } @@ -39,8 +54,6 @@ struct S bar() { struct S foo_int(int); // Be sure that we're placing the lifetime.end so that all paths go through it. -// Since this function turns out to be large-ish, optnone to hopefully keep it -// stable. // CHECK-LABEL: define dso_local void @baz __attribute__((optnone)) struct S baz(int i, volatile int *j) { diff --git a/clang/test/CodeGen/builtins-hexagon-v66-128B.c b/clang/test/CodeGen/builtins-hexagon-v66-128B.c index 074728ec07ec..5839d3ae975e 100644 --- a/clang/test/CodeGen/builtins-hexagon-v66-128B.c +++ b/clang/test/CodeGen/builtins-hexagon-v66-128B.c @@ -20,7 +20,7 @@ HEXAGON_Vect1024 test1(void *in, void *out) { v2 = *p++; q1 = *p++; - return __builtin_HEXAGON_V6_vaddcarrysat_128B(v1, v2, q1); + return __builtin_HEXAGON_V6_vaddcarrysat_128B(v1, v2, __builtin_HEXAGON_V6_vandvrt_128B(q1, -1)); } // CHECK-LABEL: @test26 diff --git a/clang/test/CodeGen/builtins-hexagon-v66.c b/clang/test/CodeGen/builtins-hexagon-v66.c index 767f9faf7702..a222228dd683 100644 --- a/clang/test/CodeGen/builtins-hexagon-v66.c +++ b/clang/test/CodeGen/builtins-hexagon-v66.c @@ -44,7 +44,7 @@ HEXAGON_Vect512 test5(void *in, void *out) { v2 = *p++; q1 = *p++; - return __builtin_HEXAGON_V6_vaddcarrysat(v1, v2, q1); + return __builtin_HEXAGON_V6_vaddcarrysat(v1, v2, __builtin_HEXAGON_V6_vandvrt(q1, -1)); } // CHECK-LABEL: @test6 diff --git a/clang/test/CodeGen/builtins-hvx128.c b/clang/test/CodeGen/builtins-hvx128.c index d61afdefc2ae..226f6c0792a7 100644 --- a/clang/test/CodeGen/builtins-hvx128.c +++ b/clang/test/CodeGen/builtins-hvx128.c @@ -6,6 +6,17 @@ void test() { int v128 __attribute__((__vector_size__(128))); int v256 __attribute__((__vector_size__(256))); + // These are special and ugly: they take an HVX vector in place of + // the HVX vector predicate. + // CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B + __builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B + __builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B + __builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B + __builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.extractw.128B __builtin_HEXAGON_V6_extractw_128B(v128, 0); // CHECK: @llvm.hexagon.V6.hi.128B @@ -19,33 +30,33 @@ void test() { // CHECK: @llvm.hexagon.V6.lvsplatw.128B __builtin_HEXAGON_V6_lvsplatw_128B(0); // CHECK: @llvm.hexagon.V6.pred.and.128B - __builtin_HEXAGON_V6_pred_and_128B(q128, q128); + __builtin_HEXAGON_V6_pred_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.and.n.128B - __builtin_HEXAGON_V6_pred_and_n_128B(q128, q128); + __builtin_HEXAGON_V6_pred_and_n_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.not.128B - __builtin_HEXAGON_V6_pred_not_128B(q128); + __builtin_HEXAGON_V6_pred_not_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.or.128B - __builtin_HEXAGON_V6_pred_or_128B(q128, q128); + __builtin_HEXAGON_V6_pred_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.or.n.128B - __builtin_HEXAGON_V6_pred_or_n_128B(q128, q128); + __builtin_HEXAGON_V6_pred_or_n_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.scalar2.128B __builtin_HEXAGON_V6_pred_scalar2_128B(0); // CHECK: @llvm.hexagon.V6.pred.scalar2v2.128B __builtin_HEXAGON_V6_pred_scalar2v2_128B(0); // CHECK: @llvm.hexagon.V6.pred.xor.128B - __builtin_HEXAGON_V6_pred_xor_128B(q128, q128); + __builtin_HEXAGON_V6_pred_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.shuffeqh.128B - __builtin_HEXAGON_V6_shuffeqh_128B(q128, q128); + __builtin_HEXAGON_V6_shuffeqh_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.shuffeqw.128B - __builtin_HEXAGON_V6_shuffeqw_128B(q128, q128); + __builtin_HEXAGON_V6_shuffeqw_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai.128B - __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B - __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B - __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai.128B - __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vabsb.128B __builtin_HEXAGON_V6_vabsb_128B(v128); // CHECK: @llvm.hexagon.V6.vabsb.sat.128B @@ -71,9 +82,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddb.dv.128B __builtin_HEXAGON_V6_vaddb_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vaddbnq.128B - __builtin_HEXAGON_V6_vaddbnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddbnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddbq.128B - __builtin_HEXAGON_V6_vaddbq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddbq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddbsat.128B __builtin_HEXAGON_V6_vaddbsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vaddbsat.dv.128B @@ -89,9 +100,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddh.dv.128B __builtin_HEXAGON_V6_vaddh_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vaddhnq.128B - __builtin_HEXAGON_V6_vaddhnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddhnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddhq.128B - __builtin_HEXAGON_V6_vaddhq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddhq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddhsat.128B __builtin_HEXAGON_V6_vaddhsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B @@ -127,9 +138,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddw.dv.128B __builtin_HEXAGON_V6_vaddw_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vaddwnq.128B - __builtin_HEXAGON_V6_vaddwnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddwnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddwq.128B - __builtin_HEXAGON_V6_vaddwq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddwsat.128B __builtin_HEXAGON_V6_vaddwsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B @@ -141,21 +152,21 @@ void test() { // CHECK: @llvm.hexagon.V6.vand.128B __builtin_HEXAGON_V6_vand_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vandnqrt.128B - __builtin_HEXAGON_V6_vandnqrt_128B(q128, 0); + __builtin_HEXAGON_V6_vandnqrt_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandnqrt.acc.128B - __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, q128, 0); + __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt.128B - __builtin_HEXAGON_V6_vandqrt_128B(q128, 0); + __builtin_HEXAGON_V6_vandqrt_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt.acc.128B - __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, q128, 0); + __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandvnqv.128B - __builtin_HEXAGON_V6_vandvnqv_128B(q128, v128); + __builtin_HEXAGON_V6_vandvnqv_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128); // CHECK: @llvm.hexagon.V6.vandvqv.128B - __builtin_HEXAGON_V6_vandvqv_128B(q128, v128); + __builtin_HEXAGON_V6_vandvqv_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128); // CHECK: @llvm.hexagon.V6.vandvrt.128B __builtin_HEXAGON_V6_vandvrt_128B(v128, 0); // CHECK: @llvm.hexagon.V6.vandvrt.acc.128B - __builtin_HEXAGON_V6_vandvrt_acc_128B(q128, v128, 0); + __builtin_HEXAGON_V6_vandvrt_acc_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, 0); // CHECK: @llvm.hexagon.V6.vaslh.128B __builtin_HEXAGON_V6_vaslh_128B(v128, 0); // CHECK: @llvm.hexagon.V6.vaslh.acc.128B @@ -297,87 +308,87 @@ void test() { // CHECK: @llvm.hexagon.V6.veqb.128B __builtin_HEXAGON_V6_veqb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.veqb.and.128B - __builtin_HEXAGON_V6_veqb_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqb_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqb.or.128B - __builtin_HEXAGON_V6_veqb_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqb_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqb.xor.128B - __builtin_HEXAGON_V6_veqb_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqb_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqh.128B __builtin_HEXAGON_V6_veqh_128B(v128, v128); // CHECK: @llvm.hexagon.V6.veqh.and.128B - __builtin_HEXAGON_V6_veqh_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqh_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqh.or.128B - __builtin_HEXAGON_V6_veqh_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqh_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqh.xor.128B - __builtin_HEXAGON_V6_veqh_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqh_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqw.128B __builtin_HEXAGON_V6_veqw_128B(v128, v128); // CHECK: @llvm.hexagon.V6.veqw.and.128B - __builtin_HEXAGON_V6_veqw_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqw_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqw.or.128B - __builtin_HEXAGON_V6_veqw_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqw_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqw.xor.128B - __builtin_HEXAGON_V6_veqw_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqw_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgathermh.128B __builtin_HEXAGON_V6_vgathermh_128B(0, 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermhq.128B - __builtin_HEXAGON_V6_vgathermhq_128B(0, q128, 0, 0, v128); + __builtin_HEXAGON_V6_vgathermhq_128B(0, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermhw.128B __builtin_HEXAGON_V6_vgathermhw_128B(0, 0, 0, v256); // CHECK: @llvm.hexagon.V6.vgathermhwq.128B - __builtin_HEXAGON_V6_vgathermhwq_128B(0, q128, 0, 0, v256); + __builtin_HEXAGON_V6_vgathermhwq_128B(0, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v256); // CHECK: @llvm.hexagon.V6.vgathermw.128B __builtin_HEXAGON_V6_vgathermw_128B(0, 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermwq.128B - __builtin_HEXAGON_V6_vgathermwq_128B(0, q128, 0, 0, v128); + __builtin_HEXAGON_V6_vgathermwq_128B(0, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgtb.128B __builtin_HEXAGON_V6_vgtb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtb.and.128B - __builtin_HEXAGON_V6_vgtb_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtb_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtb.or.128B - __builtin_HEXAGON_V6_vgtb_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtb_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtb.xor.128B - __builtin_HEXAGON_V6_vgtb_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtb_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgth.128B __builtin_HEXAGON_V6_vgth_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgth.and.128B - __builtin_HEXAGON_V6_vgth_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgth_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgth.or.128B - __builtin_HEXAGON_V6_vgth_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgth_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgth.xor.128B - __builtin_HEXAGON_V6_vgth_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgth_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.128B __builtin_HEXAGON_V6_vgtub_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.and.128B - __builtin_HEXAGON_V6_vgtub_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtub_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.or.128B - __builtin_HEXAGON_V6_vgtub_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtub_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.xor.128B - __builtin_HEXAGON_V6_vgtub_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtub_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.128B __builtin_HEXAGON_V6_vgtuh_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.and.128B - __builtin_HEXAGON_V6_vgtuh_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuh_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.or.128B - __builtin_HEXAGON_V6_vgtuh_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuh_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.xor.128B - __builtin_HEXAGON_V6_vgtuh_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuh_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.128B __builtin_HEXAGON_V6_vgtuw_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.and.128B - __builtin_HEXAGON_V6_vgtuw_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuw_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.or.128B - __builtin_HEXAGON_V6_vgtuw_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuw_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.xor.128B - __builtin_HEXAGON_V6_vgtuw_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuw_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.128B __builtin_HEXAGON_V6_vgtw_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.and.128B - __builtin_HEXAGON_V6_vgtw_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtw_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.or.128B - __builtin_HEXAGON_V6_vgtw_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtw_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.xor.128B - __builtin_HEXAGON_V6_vgtw_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtw_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vinsertwr.128B __builtin_HEXAGON_V6_vinsertwr_128B(v128, 0); // CHECK: @llvm.hexagon.V6.vlalignb.128B @@ -416,14 +427,6 @@ void test() { __builtin_HEXAGON_V6_vlutvwh_oracci_128B(v256, v128, v128, 0); // CHECK: @llvm.hexagon.V6.vlutvwhi.128B __builtin_HEXAGON_V6_vlutvwhi_128B(v128, v128, 0); - // CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B - __builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128); - // CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B - __builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128); - // CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B - __builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128); - // CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B - __builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128); // CHECK: @llvm.hexagon.V6.vmaxb.128B __builtin_HEXAGON_V6_vmaxb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vmaxh.128B @@ -567,7 +570,7 @@ void test() { // CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B __builtin_HEXAGON_V6_vmpyuhv_acc_128B(v256, v128, v128); // CHECK: @llvm.hexagon.V6.vmux.128B - __builtin_HEXAGON_V6_vmux_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vmux_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vnavgb.128B __builtin_HEXAGON_V6_vnavgb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vnavgh.128B @@ -603,11 +606,11 @@ void test() { // CHECK: @llvm.hexagon.V6.vpopcounth.128B __builtin_HEXAGON_V6_vpopcounth_128B(v128); // CHECK: @llvm.hexagon.V6.vprefixqb.128B - __builtin_HEXAGON_V6_vprefixqb_128B(q128); + __builtin_HEXAGON_V6_vprefixqb_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vprefixqh.128B - __builtin_HEXAGON_V6_vprefixqh_128B(q128); + __builtin_HEXAGON_V6_vprefixqh_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vprefixqw.128B - __builtin_HEXAGON_V6_vprefixqw_128B(q128); + __builtin_HEXAGON_V6_vprefixqw_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vrdelta.128B __builtin_HEXAGON_V6_vrdelta_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vrmpybub.rtt.128B @@ -677,19 +680,19 @@ void test() { // CHECK: @llvm.hexagon.V6.vscattermh.add.128B __builtin_HEXAGON_V6_vscattermh_add_128B(0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermhq.128B - __builtin_HEXAGON_V6_vscattermhq_128B(q128, 0, 0, v128, v128); + __builtin_HEXAGON_V6_vscattermhq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermhw.128B __builtin_HEXAGON_V6_vscattermhw_128B(0, 0, v256, v128); // CHECK: @llvm.hexagon.V6.vscattermhw.add.128B __builtin_HEXAGON_V6_vscattermhw_add_128B(0, 0, v256, v128); // CHECK: @llvm.hexagon.V6.vscattermhwq.128B - __builtin_HEXAGON_V6_vscattermhwq_128B(q128, 0, 0, v256, v128); + __builtin_HEXAGON_V6_vscattermhwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v256, v128); // CHECK: @llvm.hexagon.V6.vscattermw.128B __builtin_HEXAGON_V6_vscattermw_128B(0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermw.add.128B __builtin_HEXAGON_V6_vscattermw_add_128B(0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermwq.128B - __builtin_HEXAGON_V6_vscattermwq_128B(q128, 0, 0, v128, v128); + __builtin_HEXAGON_V6_vscattermwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vsh.128B __builtin_HEXAGON_V6_vsh_128B(v128); // CHECK: @llvm.hexagon.V6.vshufeh.128B @@ -715,9 +718,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubb.dv.128B __builtin_HEXAGON_V6_vsubb_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vsubbnq.128B - __builtin_HEXAGON_V6_vsubbnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubbnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubbq.128B - __builtin_HEXAGON_V6_vsubbq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubbq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubbsat.128B __builtin_HEXAGON_V6_vsubbsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vsubbsat.dv.128B @@ -729,9 +732,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubh.dv.128B __builtin_HEXAGON_V6_vsubh_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vsubhnq.128B - __builtin_HEXAGON_V6_vsubhnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubhnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubhq.128B - __builtin_HEXAGON_V6_vsubhq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubhq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubhsat.128B __builtin_HEXAGON_V6_vsubhsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B @@ -761,15 +764,15 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubw.dv.128B __builtin_HEXAGON_V6_vsubw_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vsubwnq.128B - __builtin_HEXAGON_V6_vsubwnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubwnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubwq.128B - __builtin_HEXAGON_V6_vsubwq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubwsat.128B __builtin_HEXAGON_V6_vsubwsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B __builtin_HEXAGON_V6_vsubwsat_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vswap.128B - __builtin_HEXAGON_V6_vswap_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vswap_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vtmpyb.128B __builtin_HEXAGON_V6_vtmpyb_128B(v256, 0); // CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B diff --git a/clang/test/CodeGen/builtins-hvx64.c b/clang/test/CodeGen/builtins-hvx64.c index 27d39990adb5..7321460c489e 100644 --- a/clang/test/CodeGen/builtins-hvx64.c +++ b/clang/test/CodeGen/builtins-hvx64.c @@ -6,6 +6,17 @@ void test() { int v64 __attribute__((__vector_size__(64))); int v128 __attribute__((__vector_size__(128))); + // These are special and ugly: they take an HVX vector in place of + // the HVX vector predicate. + // CHECK: @llvm.hexagon.V6.vmaskedstorenq + __builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.vmaskedstorentnq + __builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.vmaskedstorentq + __builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.vmaskedstoreq + __builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.extractw __builtin_HEXAGON_V6_extractw(v64, 0); // CHECK: @llvm.hexagon.V6.hi @@ -19,33 +30,33 @@ void test() { // CHECK: @llvm.hexagon.V6.lvsplatw __builtin_HEXAGON_V6_lvsplatw(0); // CHECK: @llvm.hexagon.V6.pred.and - __builtin_HEXAGON_V6_pred_and(q64, q64); + __builtin_HEXAGON_V6_pred_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.and.n - __builtin_HEXAGON_V6_pred_and_n(q64, q64); + __builtin_HEXAGON_V6_pred_and_n(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.not - __builtin_HEXAGON_V6_pred_not(q64); + __builtin_HEXAGON_V6_pred_not(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.or - __builtin_HEXAGON_V6_pred_or(q64, q64); + __builtin_HEXAGON_V6_pred_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.or.n - __builtin_HEXAGON_V6_pred_or_n(q64, q64); + __builtin_HEXAGON_V6_pred_or_n(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.scalar2 __builtin_HEXAGON_V6_pred_scalar2(0); // CHECK: @llvm.hexagon.V6.pred.scalar2v2 __builtin_HEXAGON_V6_pred_scalar2v2(0); // CHECK: @llvm.hexagon.V6.pred.xor - __builtin_HEXAGON_V6_pred_xor(q64, q64); + __builtin_HEXAGON_V6_pred_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.shuffeqh - __builtin_HEXAGON_V6_shuffeqh(q64, q64); + __builtin_HEXAGON_V6_shuffeqh(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.shuffeqw - __builtin_HEXAGON_V6_shuffeqw(q64, q64); + __builtin_HEXAGON_V6_shuffeqw(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai - __builtin_HEXAGON_V6_vS32b_nqpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_nqpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai - __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai - __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai - __builtin_HEXAGON_V6_vS32b_qpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_qpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vabsb __builtin_HEXAGON_V6_vabsb(v64); // CHECK: @llvm.hexagon.V6.vabsb.sat @@ -71,9 +82,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddb.dv __builtin_HEXAGON_V6_vaddb_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vaddbnq - __builtin_HEXAGON_V6_vaddbnq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddbnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddbq - __builtin_HEXAGON_V6_vaddbq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddbq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddbsat __builtin_HEXAGON_V6_vaddbsat(v64, v64); // CHECK: @llvm.hexagon.V6.vaddbsat.dv @@ -89,9 +100,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddh.dv __builtin_HEXAGON_V6_vaddh_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vaddhnq - __builtin_HEXAGON_V6_vaddhnq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddhnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddhq - __builtin_HEXAGON_V6_vaddhq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddhq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddhsat __builtin_HEXAGON_V6_vaddhsat(v64, v64); // CHECK: @llvm.hexagon.V6.vaddhsat.dv @@ -127,9 +138,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddw.dv __builtin_HEXAGON_V6_vaddw_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vaddwnq - __builtin_HEXAGON_V6_vaddwnq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddwnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddwq - __builtin_HEXAGON_V6_vaddwq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddwsat __builtin_HEXAGON_V6_vaddwsat(v64, v64); // CHECK: @llvm.hexagon.V6.vaddwsat.dv @@ -141,21 +152,21 @@ void test() { // CHECK: @llvm.hexagon.V6.vand __builtin_HEXAGON_V6_vand(v64, v64); // CHECK: @llvm.hexagon.V6.vandnqrt - __builtin_HEXAGON_V6_vandnqrt(q64, 0); + __builtin_HEXAGON_V6_vandnqrt(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandnqrt.acc - __builtin_HEXAGON_V6_vandnqrt_acc(v64, q64, 0); + __builtin_HEXAGON_V6_vandnqrt_acc(v64, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt - __builtin_HEXAGON_V6_vandqrt(q64, 0); + __builtin_HEXAGON_V6_vandqrt(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt.acc - __builtin_HEXAGON_V6_vandqrt_acc(v64, q64, 0); + __builtin_HEXAGON_V6_vandqrt_acc(v64, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandvnqv - __builtin_HEXAGON_V6_vandvnqv(q64, v64); + __builtin_HEXAGON_V6_vandvnqv(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64); // CHECK: @llvm.hexagon.V6.vandvqv - __builtin_HEXAGON_V6_vandvqv(q64, v64); + __builtin_HEXAGON_V6_vandvqv(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64); // CHECK: @llvm.hexagon.V6.vandvrt __builtin_HEXAGON_V6_vandvrt(v64, 0); // CHECK: @llvm.hexagon.V6.vandvrt.acc - __builtin_HEXAGON_V6_vandvrt_acc(q64, v64, 0); + __builtin_HEXAGON_V6_vandvrt_acc(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, 0); // CHECK: @llvm.hexagon.V6.vaslh __builtin_HEXAGON_V6_vaslh(v64, 0); // CHECK: @llvm.hexagon.V6.vaslh.acc @@ -297,87 +308,87 @@ void test() { // CHECK: @llvm.hexagon.V6.veqb __builtin_HEXAGON_V6_veqb(v64, v64); // CHECK: @llvm.hexagon.V6.veqb.and - __builtin_HEXAGON_V6_veqb_and(q64, v64, v64); + __builtin_HEXAGON_V6_veqb_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqb.or - __builtin_HEXAGON_V6_veqb_or(q64, v64, v64); + __builtin_HEXAGON_V6_veqb_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqb.xor - __builtin_HEXAGON_V6_veqb_xor(q64, v64, v64); + __builtin_HEXAGON_V6_veqb_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqh __builtin_HEXAGON_V6_veqh(v64, v64); // CHECK: @llvm.hexagon.V6.veqh.and - __builtin_HEXAGON_V6_veqh_and(q64, v64, v64); + __builtin_HEXAGON_V6_veqh_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqh.or - __builtin_HEXAGON_V6_veqh_or(q64, v64, v64); + __builtin_HEXAGON_V6_veqh_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqh.xor - __builtin_HEXAGON_V6_veqh_xor(q64, v64, v64); + __builtin_HEXAGON_V6_veqh_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqw __builtin_HEXAGON_V6_veqw(v64, v64); // CHECK: @llvm.hexagon.V6.veqw.and - __builtin_HEXAGON_V6_veqw_and(q64, v64, v64); + __builtin_HEXAGON_V6_veqw_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqw.or - __builtin_HEXAGON_V6_veqw_or(q64, v64, v64); + __builtin_HEXAGON_V6_veqw_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqw.xor - __builtin_HEXAGON_V6_veqw_xor(q64, v64, v64); + __builtin_HEXAGON_V6_veqw_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgathermh __builtin_HEXAGON_V6_vgathermh(0, 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgathermhq - __builtin_HEXAGON_V6_vgathermhq(0, q64, 0, 0, v64); + __builtin_HEXAGON_V6_vgathermhq(0, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgathermhw __builtin_HEXAGON_V6_vgathermhw(0, 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermhwq - __builtin_HEXAGON_V6_vgathermhwq(0, q64, 0, 0, v128); + __builtin_HEXAGON_V6_vgathermhwq(0, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermw __builtin_HEXAGON_V6_vgathermw(0, 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgathermwq - __builtin_HEXAGON_V6_vgathermwq(0, q64, 0, 0, v64); + __builtin_HEXAGON_V6_vgathermwq(0, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgtb __builtin_HEXAGON_V6_vgtb(v64, v64); // CHECK: @llvm.hexagon.V6.vgtb.and - __builtin_HEXAGON_V6_vgtb_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtb_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtb.or - __builtin_HEXAGON_V6_vgtb_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtb_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtb.xor - __builtin_HEXAGON_V6_vgtb_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtb_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgth __builtin_HEXAGON_V6_vgth(v64, v64); // CHECK: @llvm.hexagon.V6.vgth.and - __builtin_HEXAGON_V6_vgth_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgth_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgth.or - __builtin_HEXAGON_V6_vgth_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgth_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgth.xor - __builtin_HEXAGON_V6_vgth_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgth_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtub __builtin_HEXAGON_V6_vgtub(v64, v64); // CHECK: @llvm.hexagon.V6.vgtub.and - __builtin_HEXAGON_V6_vgtub_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtub_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtub.or - __builtin_HEXAGON_V6_vgtub_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtub_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtub.xor - __builtin_HEXAGON_V6_vgtub_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtub_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh __builtin_HEXAGON_V6_vgtuh(v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh.and - __builtin_HEXAGON_V6_vgtuh_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuh_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh.or - __builtin_HEXAGON_V6_vgtuh_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuh_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh.xor - __builtin_HEXAGON_V6_vgtuh_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuh_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw __builtin_HEXAGON_V6_vgtuw(v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw.and - __builtin_HEXAGON_V6_vgtuw_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuw_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw.or - __builtin_HEXAGON_V6_vgtuw_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuw_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw.xor - __builtin_HEXAGON_V6_vgtuw_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuw_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtw __builtin_HEXAGON_V6_vgtw(v64, v64); // CHECK: @llvm.hexagon.V6.vgtw.and - __builtin_HEXAGON_V6_vgtw_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtw_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtw.or - __builtin_HEXAGON_V6_vgtw_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtw_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtw.xor - __builtin_HEXAGON_V6_vgtw_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtw_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vinsertwr __builtin_HEXAGON_V6_vinsertwr(v64, 0); // CHECK: @llvm.hexagon.V6.vlalignb @@ -416,14 +427,6 @@ void test() { __builtin_HEXAGON_V6_vlutvwh_oracci(v128, v64, v64, 0); // CHECK: @llvm.hexagon.V6.vlutvwhi __builtin_HEXAGON_V6_vlutvwhi(v64, v64, 0); - // CHECK: @llvm.hexagon.V6.vmaskedstorenq - __builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64); - // CHECK: @llvm.hexagon.V6.vmaskedstorentnq - __builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64); - // CHECK: @llvm.hexagon.V6.vmaskedstorentq - __builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64); - // CHECK: @llvm.hexagon.V6.vmaskedstoreq - __builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64); // CHECK: @llvm.hexagon.V6.vmaxb __builtin_HEXAGON_V6_vmaxb(v64, v64); // CHECK: @llvm.hexagon.V6.vmaxh @@ -567,7 +570,7 @@ void test() { // CHECK: @llvm.hexagon.V6.vmpyuhv.acc __builtin_HEXAGON_V6_vmpyuhv_acc(v128, v64, v64); // CHECK: @llvm.hexagon.V6.vmux - __builtin_HEXAGON_V6_vmux(q64, v64, v64); + __builtin_HEXAGON_V6_vmux(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vnavgb __builtin_HEXAGON_V6_vnavgb(v64, v64); // CHECK: @llvm.hexagon.V6.vnavgh @@ -603,11 +606,11 @@ void test() { // CHECK: @llvm.hexagon.V6.vpopcounth __builtin_HEXAGON_V6_vpopcounth(v64); // CHECK: @llvm.hexagon.V6.vprefixqb - __builtin_HEXAGON_V6_vprefixqb(q64); + __builtin_HEXAGON_V6_vprefixqb(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vprefixqh - __builtin_HEXAGON_V6_vprefixqh(q64); + __builtin_HEXAGON_V6_vprefixqh(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vprefixqw - __builtin_HEXAGON_V6_vprefixqw(q64); + __builtin_HEXAGON_V6_vprefixqw(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vrdelta __builtin_HEXAGON_V6_vrdelta(v64, v64); // CHECK: @llvm.hexagon.V6.vrmpybub.rtt @@ -677,19 +680,19 @@ void test() { // CHECK: @llvm.hexagon.V6.vscattermh.add __builtin_HEXAGON_V6_vscattermh_add(0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermhq - __builtin_HEXAGON_V6_vscattermhq(q64, 0, 0, v64, v64); + __builtin_HEXAGON_V6_vscattermhq(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermhw __builtin_HEXAGON_V6_vscattermhw(0, 0, v128, v64); // CHECK: @llvm.hexagon.V6.vscattermhw.add __builtin_HEXAGON_V6_vscattermhw_add(0, 0, v128, v64); // CHECK: @llvm.hexagon.V6.vscattermhwq - __builtin_HEXAGON_V6_vscattermhwq(q64, 0, 0, v128, v64); + __builtin_HEXAGON_V6_vscattermhwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v128, v64); // CHECK: @llvm.hexagon.V6.vscattermw __builtin_HEXAGON_V6_vscattermw(0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermw.add __builtin_HEXAGON_V6_vscattermw_add(0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermwq - __builtin_HEXAGON_V6_vscattermwq(q64, 0, 0, v64, v64); + __builtin_HEXAGON_V6_vscattermwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vsh __builtin_HEXAGON_V6_vsh(v64); // CHECK: @llvm.hexagon.V6.vshufeh @@ -715,9 +718,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubb.dv __builtin_HEXAGON_V6_vsubb_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vsubbnq - __builtin_HEXAGON_V6_vsubbnq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubbnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubbq - __builtin_HEXAGON_V6_vsubbq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubbq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubbsat __builtin_HEXAGON_V6_vsubbsat(v64, v64); // CHECK: @llvm.hexagon.V6.vsubbsat.dv @@ -729,9 +732,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubh.dv __builtin_HEXAGON_V6_vsubh_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vsubhnq - __builtin_HEXAGON_V6_vsubhnq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubhnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubhq - __builtin_HEXAGON_V6_vsubhq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubhq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubhsat __builtin_HEXAGON_V6_vsubhsat(v64, v64); // CHECK: @llvm.hexagon.V6.vsubhsat.dv @@ -761,15 +764,15 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubw.dv __builtin_HEXAGON_V6_vsubw_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vsubwnq - __builtin_HEXAGON_V6_vsubwnq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubwnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubwq - __builtin_HEXAGON_V6_vsubwq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubwsat __builtin_HEXAGON_V6_vsubwsat(v64, v64); // CHECK: @llvm.hexagon.V6.vsubwsat.dv __builtin_HEXAGON_V6_vsubwsat_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vswap - __builtin_HEXAGON_V6_vswap(q64, v64, v64); + __builtin_HEXAGON_V6_vswap(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vtmpyb __builtin_HEXAGON_V6_vtmpyb(v128, 0); // CHECK: @llvm.hexagon.V6.vtmpyb.acc diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c index 417caed494d9..babd3345d787 100644 --- a/clang/test/CodeGen/builtins-reduction-math.c +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -57,3 +57,14 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) { const si8 cvi1 = vi1; unsigned long long r5 = __builtin_reduce_min(cvi1); } + +void test_builtin_reduce_xor(si8 vi1, u4 vu1) { + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_xor(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_xor(vu1); +} diff --git a/clang/test/CodeGen/ms-inline-asm-functions.c b/clang/test/CodeGen/ms-inline-asm-functions.c index c958d8803871..1a6ead9286df 100644 --- a/clang/test/CodeGen/ms-inline-asm-functions.c +++ b/clang/test/CodeGen/ms-inline-asm-functions.c @@ -39,7 +39,7 @@ int bar() { int baz() { // CHECK-LABEL: _baz: __asm mov eax, k; - // CHECK: movl k, %eax + // CHECK: movl _k, %eax __asm mov eax, kptr; // CHECK: movl _kptr, %eax } diff --git a/clang/test/CodeGenCXX/debug-info-objname.cpp b/clang/test/CodeGenCXX/debug-info-objname.cpp index d80d805b8b41..73d3bb4ec4eb 100644 --- a/clang/test/CodeGenCXX/debug-info-objname.cpp +++ b/clang/test/CodeGenCXX/debug-info-objname.cpp @@ -1,41 +1,41 @@ -// REQUIRES: x86-registered-target -// RUN: cp %s %T/debug-info-objname.cpp -// RUN: cd %T - -// No output file provided, input file is relative, we emit an absolute path (MSVC behavior). -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// No output file provided, input file is absolute, we emit an absolute path (MSVC behavior). -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -- %T/debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// The output file is provided as an absolute path, we emit an absolute path. -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc /Fo%T/debug-info-objname.obj -- %T/debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// The output file is provided as relative path, -working-dir is provided, we emit an absolute path. -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -working-dir=%T debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// The input file name is relative and we specify -fdebug-compilation-dir, we emit a relative path. -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=RELATIVE - -// Ensure /FA emits an .asm file which contains the path to the final .obj, not the .asm -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /FA debug-info-objname.cpp -// RUN: cat debug-info-objname.asm | FileCheck %s --check-prefix=ASM - -// Same thing for -save-temps -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /clang:-save-temps debug-info-objname.cpp -// RUN: cat debug-info-objname.asm | FileCheck %s --check-prefix=ASM - -int main() { - return 1; -} - -// ABSOLUTE: S_OBJNAME [size = {{[0-9]+}}] sig=0, `{{.+}}debug-info-objname.obj` -// RELATIVE: S_OBJNAME [size = {{[0-9]+}}] sig=0, `debug-info-objname.obj` -// ASM: Record kind: S_OBJNAME -// ASM-NEXT: .long 0 -// ASM-NEXT: .asciz "debug-info-objname.obj" +// REQUIRES: x86-registered-target +// RUN: rm -rf %t && mkdir %t && cd %t +// RUN: cp %s debug-info-objname.cpp + +/// No output file provided, input file is relative, we emit an absolute path (MSVC behavior). +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// No output file provided, input file is absolute, we emit an absolute path (MSVC behavior). +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -- %t/debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// The output file is provided as an absolute path, we emit an absolute path. +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc /Fo%t/debug-info-objname.obj -- %t/debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// The output file is provided as relative path, -working-dir is provided, we emit an absolute path. +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -working-dir=%t debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// The input file name is relative and we specify -fdebug-compilation-dir, we emit a relative path. +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=RELATIVE + +/// Ensure /FA emits an .asm file which contains the path to the final .obj, not the .asm +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /FA debug-info-objname.cpp +// RUN: FileCheck --input-file=debug-info-objname.asm --check-prefix=ASM %s + +/// Same thing for -save-temps +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /clang:-save-temps debug-info-objname.cpp +// RUN: FileCheck --input-file=debug-info-objname.asm --check-prefix=ASM %s + +int main() { + return 1; +} + +// ABSOLUTE: S_OBJNAME [size = [[#]]] sig=0, `{{.+}}debug-info-objname.obj` +// RELATIVE: S_OBJNAME [size = [[#]]] sig=0, `debug-info-objname.obj` +// ASM: Record kind: S_OBJNAME +// ASM-NEXT: .long 0 +// ASM-NEXT: .asciz "debug-info-objname.obj" diff --git a/clang/test/Driver/hexagon-hvx-ieee-fp.c b/clang/test/Driver/hexagon-hvx-ieee-fp.c new file mode 100644 index 000000000000..cd6e11ef06fe --- /dev/null +++ b/clang/test/Driver/hexagon-hvx-ieee-fp.c @@ -0,0 +1,25 @@ +// ----------------------------------------------------------------------------- +// Tests for the hvx ieee fp feature and errors. +// ----------------------------------------------------------------------------- + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-IEEEFP %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx=v68 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-IEEEFP %s +// CHECK-IEEEFP: "-target-feature" "+hvx-ieee-fp" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-ieee-fp \ +// RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-NO-IEEEFP %s +// CHECK-NO-IEEEFP: "-target-feature" "-hvx-ieee-fp" + +// IEEE-FP is valid only on hvxv68 and hvxv68+. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx=v66 \ +// RUN: -mhvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// CHECK-ERROR1: error: -mhvx-ieee-fp is not supported on HVX v66 + +// IEEE-FP is valid only if HVX is enabled. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR2 %s +// CHECK-ERROR2: error: -mhvx-ieee-fp requires HVX, use -mhvx/-mhvx= to enable it diff --git a/clang/test/Driver/hexagon-hvx-qfloat.c b/clang/test/Driver/hexagon-hvx-qfloat.c new file mode 100644 index 000000000000..fc647f7b06ab --- /dev/null +++ b/clang/test/Driver/hexagon-hvx-qfloat.c @@ -0,0 +1,25 @@ +// ----------------------------------------------------------------------------- +// Tests for the hvx qfloat feature and errors. +// ----------------------------------------------------------------------------- + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-QFLOAT %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx=v68 -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-QFLOAT %s +// CHECK-QFLOAT: "-target-feature" "+hvx-qfloat" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-qfloat \ +// RUN: -mno-hvx-qfloat 2>&1 | FileCheck -check-prefix=CHECK-NO-QFLOAT %s +// CHECK-NO-QFLOAT: "-target-feature" "-hvx-qfloat" + +// QFloat is valid only on hvxv68+. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx=v66 \ +// RUN: -mhvx-qfloat 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// CHECK-ERROR1: error: -mhvx-qfloat is not supported on HVX v66 + +// QFloat is valid only if HVX is enabled. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR2 %s +// CHECK-ERROR2: error: -mhvx-qfloat requires HVX, use -mhvx/-mhvx= to enable it diff --git a/clang/test/Driver/hexagon-hvx.c b/clang/test/Driver/hexagon-hvx.c index 4642aef81a31..10bb8fe9327f 100644 --- a/clang/test/Driver/hexagon-hvx.c +++ b/clang/test/Driver/hexagon-hvx.c @@ -2,94 +2,227 @@ // Tests for the hvx features and warnings. // ----------------------------------------------------------------------------- -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX166 %s -// CHECKHVX166: "-target-feature" "+hvxv66" - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX165 %s -// CHECKHVX165: "-target-feature" "+hvxv65" +// No HVX without -mhvx/-mhvx= + +// CHECK-HVX-ON: "-target-feature" "+hvx +// CHECK-HVX-ON-NOT: "-target-feature" "-hvx +// CHECK-HVX-OFF-NOT: "-target-feature" "+hvx + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv5 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv55 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67t \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s + +// Infer HVX version from flag: + +// CHECK-HVX-V60: "-target-feature" "+hvxv60" +// CHECK-HVX-V62: "-target-feature" "+hvxv62" +// CHECK-HVX-V65: "-target-feature" "+hvxv65" +// CHECK-HVX-V66: "-target-feature" "+hvxv66" +// CHECK-HVX-V67: "-target-feature" "+hvxv67" +// CHECK-HVX-V68: "-target-feature" "+hvxv68" +// CHECK-HVX-V69: "-target-feature" "+hvxv69" + +// Direct version flag: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Direct version flag with different CPU version: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 -mv62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 -mv65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 -mv66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 -mv67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 -mv68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 -mv69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mv60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Direct version flag with different CPU version and versionless -mhvx: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 -mv62 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 -mv65 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 -mv66 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 -mv67 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 -mv68 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 -mv69 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mv60 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Direct version flag with different CPU version, versionless -mhvx +// and -mno-hvx. The -mno-hvx cancels -mhvx=, versionless -mhvx wins: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 -mno-hvx -mv62 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 -mno-hvx -mv65 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 -mno-hvx -mv66 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 -mno-hvx -mv67 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 -mno-hvx -mv68 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 -mno-hvx -mv69 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mno-hvx -mv60 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s + +// Direct version flag with different CPU version, versionless -mhvx +// and -mno-hvx. The -mno-hvx cancels versionless -mhvx, -mhvx= wins: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mno-hvx -mhvx=v60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx -mno-hvx -mhvx=v62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mno-hvx -mhvx=v65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67 -mhvx -mno-hvx -mhvx=v66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mno-hvx -mhvx=v67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx -mhvx=v68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx -mno-hvx -mhvx=v69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Infer HVX version from CPU version: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX162 %s -// CHECKHVX162: "-target-feature" "+hvxv62" - +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ -// RUN: -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \ -// RUN: -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \ -// RUN: -mhvx-length=128b 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s -// CHECKHVX2-NOT: "-target-feature" "+hvx-length64b" -// CHECKHVX2: "-target-feature" "+hvx-length128b" - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECKHVX3 %s - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECKHVX3 %s -// CHECKHVX3-NOT: "-target-feature" "+hvx - -// No hvx target feature must be added if -mno-hvx occurs last -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mno-hvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NOHVX %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mno-hvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NOHVX %s -// CHECK-NOHVX-NOT: "-target-feature" "+hvx - -// Hvx target feature should be added if -mno-hvx doesn't occur last -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mno-hvx -mhvx\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXFEAT %s -// CHECK-HVXFEAT: "-target-feature" "+hvxv62" - -// With -mhvx, the version of hvx defaults to Cpu -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-DEFAULT %s -// CHECK-HVX-DEFAULT: "-target-feature" "+hvxv60" - -// Test -mhvx= flag -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx=v62 \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXEQ %s -// CHECK-HVXEQ: "-target-feature" "+hvxv62" - -// Honor the last occurred -mhvx=, -mhvx flag. -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx=v62 -mhvx\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXEQ-PRE %s -// CHECK-HVXEQ-PRE-NOT: "-target-feature" "+hvxv62" -// CHECK-HVXEQ-PRE: "-target-feature" "+hvxv60" -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx -mhvx=v62\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXEQ-PRE2 %s -// CHECK-HVXEQ-PRE2-NOT: "-target-feature" "+hvxv60" -// CHECK-HVXEQ-PRE2: "-target-feature" "+hvxv62" - -// Test -mhvx-length flag -// The default mode on v60,v62 is 64B. -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: -mhvx-length=64b 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: -mhvx-length=64B 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s -// CHECK-HVXLENGTH-64B: "-target-feature" "+hvx{{.*}}" "-target-feature" "+hvx-length64b" -// The default mode on v66 and future archs is 128B. +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-128B %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mhvx-length=128B\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-128B %s -// CHECK-HVXLENGTH-128B: "-target-feature" "+hvx{{.*}}" "-target-feature" "+hvx-length128b" - -// Bail out if -mhvx-length is specified without HVX enabled -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx-length=64B \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-ERROR %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx-length=128B \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-ERROR %s -// CHECK-HVXLENGTH-ERROR: error: -mhvx-length is not supported without a -mhvx/-mhvx= flag - -// Error out if an unsupported value is passed to -mhvx-length. +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67t -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Infer HVX length from flag: + +// CHECK-HVX-L64: "-target-feature" "+hvx-length64b" +// CHECK-HVX-L64-NOT: "-target-feature" "+hvx-length128b" +// CHECK-HVX-L128: "-target-feature" "+hvx-length128b" +// CHECK-HVX-L128-NOT: "-target-feature" "+hvx-length64b" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=64b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=128b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s + +// Infer HVX length from HVX version: + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s + +// No HVX with trailing -mno-hvx + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mhvx-length=128b -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-qfloat -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s + +// Float + +// CHECK-HVX-QFLOAT-ON: "-target-feature" "+hvx-qfloat" +// CHECK-HVX-QFLOAT-OFF-NOT: "-target-feature" "+hvx-qfloat" +// CHECK-HVX-IEEE-ON: "-target-feature" "+hvx-ieee-fp" +// CHECK-HVX-IEEE-OFF-NOT: "-target-feature" "+hvx-ieee-fp" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-qfloat -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-qfloat -mno-hvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-OFF %s + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-ieee-fp -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-ieee-fp -mno-hvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-OFF %s + +// HVX flags heed HVX: + +// CHECK-NEEDS-HVX: error: {{.*}} requires HVX, use -mhvx/-mhvx= to enable it + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx-length=64b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx-length=128b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s + +// Invalid HVX length: + +// CHECK-HVX-BAD-LENGTH: error: unsupported argument '{{.*}}' to option 'mhvx-length=' + // RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=B \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-VALUE-ERROR %s +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-BAD-LENGTH %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=128 \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-VALUE-ERROR %s -// CHECK-HVXLENGTH-VALUE-ERROR: error: unsupported argument '{{.*}}' to option 'mhvx-length=' +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-BAD-LENGTH %s diff --git a/clang/test/Driver/hexagon-toolchain-elf.c b/clang/test/Driver/hexagon-toolchain-elf.c index cc11f9fcba9e..d7c8015547e4 100644 --- a/clang/test/Driver/hexagon-toolchain-elf.c +++ b/clang/test/Driver/hexagon-toolchain-elf.c @@ -151,6 +151,22 @@ // CHECK02B: "-cc1" {{.*}} "-target-cpu" "hexagonv67t" // CHECK02B: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v67t/crt0 +// RUN: %clang -### -target hexagon-unknown-elf \ +// RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ +// RUN: -mcpu=hexagonv68 -fuse-ld=hexagon-link\ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK02C %s +// CHECK02C: "-cc1" {{.*}} "-target-cpu" "hexagonv68" +// CHECK02C: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v68/crt0 + +// RUN: %clang -### -target hexagon-unknown-elf \ +// RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ +// RUN: -mcpu=hexagonv69 -fuse-ld=hexagon-link\ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK02D %s +// CHECK02D: "-cc1" {{.*}} "-target-cpu" "hexagonv69" +// CHECK02D: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v69/crt0 + // ----------------------------------------------------------------------------- // Test Linker related args // ----------------------------------------------------------------------------- diff --git a/clang/test/Driver/hexagon-vectorize.c b/clang/test/Driver/hexagon-vectorize.c index dcd6a09222eb..6a142e30cd94 100644 --- a/clang/test/Driver/hexagon-vectorize.c +++ b/clang/test/Driver/hexagon-vectorize.c @@ -6,4 +6,4 @@ // CHECK-DEFAULT-NOT: hexagon-autohvx // CHECK-VECTOR: "-mllvm" "-hexagon-autohvx" // CHECK-NOVECTOR-NOT: hexagon-autohvx -// CHECK-NEEDHVX: warning: auto-vectorization requires HVX, use -mhvx to enable it +// CHECK-NEEDHVX: warning: auto-vectorization requires HVX, use -mhvx/-mhvx= to enable it diff --git a/clang/test/Driver/spirv-toolchain.cl b/clang/test/Driver/spirv-toolchain.cl new file mode 100644 index 000000000000..4be08f127290 --- /dev/null +++ b/clang/test/Driver/spirv-toolchain.cl @@ -0,0 +1,65 @@ +// Check object emission. +// RUN: %clang -### --target=spirv64 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s + +// SPV64: "-cc1" "-triple" "spirv64" +// SPV64-SAME: "-o" [[BC:".*bc"]] +// SPV64: {{llvm-spirv.*"}} [[BC]] "-o" {{".*o"}} + +// RUN: %clang -### --target=spirv32 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s + +// SPV32: "-cc1" "-triple" "spirv32" +// SPV32-SAME: "-o" [[BC:".*bc"]] +// SPV32: {{llvm-spirv.*"}} [[BC]] "-o" {{".*o"}} + +//----------------------------------------------------------------------------- +// Check Assembly emission. +// RUN: %clang -### --target=spirv64 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### --target=spirv64 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### --target=spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s + +// SPT64: "-cc1" "-triple" "spirv64" +// SPT64-SAME: "-o" [[BC:".*bc"]] +// SPT64: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" {{".*s"}} + +// RUN: %clang -### --target=spirv32 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### --target=spirv32 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### --target=spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s + +// SPT32: "-cc1" "-triple" "spirv32" +// SPT32-SAME: "-o" [[BC:".*bc"]] +// SPT32: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" {{".*s"}} + +//----------------------------------------------------------------------------- +// Check assembly input -> object output +// RUN: %clang -### --target=spirv64 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s +// RUN: %clang -### --target=spirv32 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s +// ASM: {{llvm-spirv.*"}} {{".*"}} "-to-binary" "-o" {{".*o"}} + +//----------------------------------------------------------------------------- +// Check --save-temps. +// RUN: %clang -### --target=spirv64 -x cl -c %s --save-temps 2>&1 | FileCheck --check-prefix=TMP %s + +// TMP: "-cc1" "-triple" "spirv64" +// TMP-SAME: "-E" +// TMP-SAME: "-o" [[I:".*i"]] +// TMP: "-cc1" "-triple" "spirv64" +// TMP-SAME: "-o" [[BC:".*bc"]] +// TMP-SAME: [[I]] +// TMP: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" [[S:".*s"]] +// TMP: {{llvm-spirv.*"}} [[S]] "-to-binary" "-o" {{".*o"}} + +//----------------------------------------------------------------------------- +// Check that warning occurs if multiple input files are passed. +// RUN: %clang -### --target=spirv64 %s %s 2>&1 | FileCheck --check-prefix=WARN %s + +// WARN: warning: Linking multiple input files is not supported for SPIR-V yet diff --git a/clang/test/Driver/unsupported-outline-atomics.c b/clang/test/Driver/unsupported-outline-atomics.c new file mode 100644 index 000000000000..2e88528c2ec6 --- /dev/null +++ b/clang/test/Driver/unsupported-outline-atomics.c @@ -0,0 +1,15 @@ +// RUN: %clang -target x86_64 -moutline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-OUTLINE-ATOMICS-X86 +// CHECK-OUTLINE-ATOMICS-X86: warning: 'x86_64' does not support '-moutline-atomics'; flag ignored [-Woption-ignored] +// CHECK-OUTLINE-ATOMICS-X86-NOT: "-target-feature" "+outline-atomics" + +// RUN: %clang -target x86_64 -mno-outline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-NO-OUTLINE-ATOMICS-X86 +// CHECK-NO-OUTLINE-ATOMICS-X86: warning: 'x86_64' does not support '-mno-outline-atomics'; flag ignored [-Woption-ignored] +// CHECK-NO-OUTLINE-ATOMICS-X86-NOT: "-target-feature" "-outline-atomics" + +// RUN: %clang -target riscv64 -moutline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-OUTLINE-ATOMICS-RISCV +// CHECK-OUTLINE-ATOMICS-RISCV: warning: 'riscv64' does not support '-moutline-atomics'; flag ignored [-Woption-ignored] +// CHECK-OUTLINE-ATOMICS-RISCV-NOT: "-target-feature" "+outline-atomics" + +// RUN: %clang -target riscv64 -mno-outline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-NO-OUTLINE-ATOMICS-RISCV +// CHECK-NO-OUTLINE-ATOMICS-RISCV: warning: 'riscv64' does not support '-mno-outline-atomics'; flag ignored [-Woption-ignored] +// CHECK-NO-OUTLINE-ATOMICS-RISCV-NOT: "-target-feature" "-outline-atomics" diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 1b436d8d904f..04f5a7532b42 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -69,7 +69,7 @@ // RUN: not %clang_cc1 -triple hexagon--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix HEXAGON // HEXAGON: error: unknown target CPU 'not-a-cpu' -// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68{{$}} +// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68, hexagonv69{{$}} // RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF // BPF: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm b/clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm new file mode 100644 index 000000000000..ee41b5374613 --- /dev/null +++ b/clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm @@ -0,0 +1,8 @@ +export module Templ; +export template +class G { +public: + T operator()() { + return T(); + } +}; diff --git a/clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm b/clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm new file mode 100644 index 000000000000..63cd595a642d --- /dev/null +++ b/clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm @@ -0,0 +1,6 @@ +export module bar; +import Templ; +export template +int bar() { + return G()(); +} diff --git a/clang/test/Modules/concept.cppm b/clang/test/Modules/concept.cppm index e14a158644af..f171e6d08237 100644 --- a/clang/test/Modules/concept.cppm +++ b/clang/test/Modules/concept.cppm @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir %t -// RUN: %clang -std=c++20 %S/Inputs/concept/A.cppm --precompile -o %t/A.pcm -// RUN: %clang -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs/concept %s -c -Xclang -verify +// RUN: %clang_cc1 -x c++ -std=c++20 %S/Inputs/concept/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs/concept %s -fsyntax-only -verify // expected-no-diagnostics module; diff --git a/clang/test/Modules/module-transtive-instantiation-2.cpp b/clang/test/Modules/module-transtive-instantiation-2.cpp new file mode 100644 index 000000000000..b058cd77f436 --- /dev/null +++ b/clang/test/Modules/module-transtive-instantiation-2.cpp @@ -0,0 +1,11 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify + +import bar; +int foo() { + G g; // expected-error {{declaration of 'G' must be imported from module 'Templ' before it is required}} + return g(); // expected-note@Inputs/module-transtive-instantiation/Templ.cppm:3 {{declaration here is not visible}} +} diff --git a/clang/test/Modules/module-transtive-instantiation.cpp b/clang/test/Modules/module-transtive-instantiation.cpp new file mode 100644 index 000000000000..b44f0bbfdf39 --- /dev/null +++ b/clang/test/Modules/module-transtive-instantiation.cpp @@ -0,0 +1,11 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify +// expected-no-diagnostics + +import bar; +int foo() { + return bar(); +} diff --git a/clang/test/Modules/odr_using_dependent_name.cppm b/clang/test/Modules/odr_using_dependent_name.cppm index 8d7ea9f57bed..c2938855fdbe 100644 --- a/clang/test/Modules/odr_using_dependent_name.cppm +++ b/clang/test/Modules/odr_using_dependent_name.cppm @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: %clang -std=c++20 %S/Inputs/odr_using_dependent_name/X.cppm --precompile -o %t/X.pcm -// RUN: %clang -std=c++20 -I%S/Inputs/odr_using_dependent_name -fprebuilt-module-path=%t %s --precompile -c +// RUN: %clang_cc1 -std=c++20 %S/Inputs/odr_using_dependent_name/X.cppm -emit-module-interface -o %t/X.pcm +// RUN: %clang_cc1 -std=c++20 -I%S/Inputs/odr_using_dependent_name -fprebuilt-module-path=%t %s -emit-module-interface -fsyntax-only -verify // expected-no-diagnostics module; #include "foo.h" diff --git a/clang/test/OpenMP/atomic_messages.cpp b/clang/test/OpenMP/atomic_messages.cpp index 4a8ca7ba82ce..608847f87dad 100644 --- a/clang/test/OpenMP/atomic_messages.cpp +++ b/clang/test/OpenMP/atomic_messages.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -ferror-limit 150 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50,omp51 -fopenmp -fopenmp-version=51 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -ferror-limit 150 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50,omp51 -fopenmp-simd -fopenmp-version=51 -ferror-limit 150 %s -Wuninitialized int foo() { L1: @@ -896,19 +898,19 @@ int relaxed() { template T mixed() { T a, b = T(); -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'read' clause used here}} #pragma omp atomic read write a = b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'write' clause used here}} #pragma omp atomic write read a = b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'update' clause used here}} #pragma omp atomic update read a += b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'capture' clause used here}} #pragma omp atomic capture read a = ++b; @@ -917,19 +919,19 @@ T mixed() { int mixed() { int a, b = 0; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'read' clause used here}} #pragma omp atomic read write a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write read a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write update a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write capture a = b; @@ -937,3 +939,14 @@ int mixed() { return mixed(); } +#if _OPENMP >= 202011 +int compare() { + int a, b, c; +// omp51-error@+1 {{atomic compare is not supported for now}} +#pragma omp atomic compare + { + if (a == b) + a = c; + } +} +#endif diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp index cd09f80d5580..c66cc0b384da 100644 --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -16,17 +16,17 @@ void a() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META10:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, { float, float }* [[B]], align 4, !dbg [[DBG13:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to { float, float }*, !dbg [[DBG13]] -// CHECK1-NEXT: store { float, float } [[TMP0]], { float, float }* [[CONV]], align 4, !dbg [[DBG13]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG13]] -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG13]] -// CHECK1-NEXT: ret void, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, { float, float }* [[B]], align 4, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to { float, float }*, !dbg [[DBG14]] +// CHECK1-NEXT: store { float, float } [[TMP0]], { float, float }* [[CONV]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG14]] +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG14]] +// CHECK1-NEXT: ret void, !dbg [[DBG15:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], <2 x float> [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG15:![0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], <2 x float> [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG16:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -34,31 +34,30 @@ void a() { // CHECK1-NEXT: [[TMP0:%.*]] = bitcast { float, float }* [[B]] to <2 x float>* // CHECK1-NEXT: store <2 x float> [[B_COERCE]], <2 x float>* [[TMP0]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG28:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG29:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG29:![0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG30:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[B_ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to { float, float }*, !dbg [[DBG37:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CONV]] to <2 x float>*, !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 8, !dbg [[DBG37]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP0]], i32* [[TMP1]], <2 x float> [[TMP3]]) #[[ATTR4:[0-9]+]], !dbg [[DBG37]] -// CHECK1-NEXT: ret void, !dbg [[DBG37]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[B_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to { float, float }*, !dbg [[DBG38:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CONV]] to <2 x float>*, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4, !dbg [[DBG38]] +// CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP0]], i32* [[TMP1]], <2 x float> [[TMP3]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] +// CHECK1-NEXT: ret void, !dbg [[DBG38]] // -// \ No newline at end of file diff --git a/clang/test/OpenMP/debug_task_shared.c b/clang/test/OpenMP/debug_task_shared.c new file mode 100644 index 000000000000..c3a23bc48632 --- /dev/null +++ b/clang/test/OpenMP/debug_task_shared.c @@ -0,0 +1,55 @@ +// This testcase checks emission of debug info for variables +// inside shared clause of task construct. + +// REQUIRES: x86_64-linux + +// RUN: %clang_cc1 -debug-info-kind=constructor -DSHARED -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -debug-info-kind=constructor -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=NEG +// expected-no-diagnostics + +// CHECK-LABEL: define internal i32 @.omp_task_entry. + +// CHECK-DAG: [[CONTEXT:%[0-9]+]] = load %struct.anon*, %struct.anon** %__context.addr.i, align 8 +// CHECK-DAG: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata [[SHARE2:![0-9]+]], metadata !DIExpression(DW_OP_deref)) +// CHECK-DAG: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata [[SHARE3:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) +// CHECK-DAG: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata [[SHARE1:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 16, DW_OP_deref)) + +// CHECK-DAG: [[SHARE2]] = !DILocalVariable(name: "share2" +// CHECK-DAG: [[SHARE3]] = !DILocalVariable(name: "share3" +// CHECK-DAG: [[SHARE1]] = !DILocalVariable(name: "share1" + +// NEG-LABEL: define internal i32 @.omp_task_entry. +// NEG: [[CONTEXT:%[0-9]+]] = load %struct.anon*, %struct.anon** %__context.addr.i, align 8 +// NEG-NOT: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata {{![0-9]+}}, metadata !DIExpression(DW_OP_deref)) + +extern int printf(const char *, ...); + +int foo(int n) { + int share1 = 9, share2 = 11, share3 = 13, priv1, priv2, fpriv; + fpriv = n + 4; + + if (n < 2) + return n; + else { +#if SHARED +#pragma omp task shared(share1, share2) private(priv1, priv2) firstprivate(fpriv) shared(share3) +#else +#pragma omp task private(priv1, priv2) firstprivate(fpriv) +#endif + { + priv1 = n; + priv2 = n + 2; + share2 += share3; + printf("share1 = %d, share2 = %d, share3 = %d\n", share1, share2, share3); + share1 = priv1 + priv2 + fpriv + foo(n - 1) + share2 + share3; + } +#pragma omp taskwait + return share1 + share2 + share3; + } +} + +int main() { + int n = 10; + printf("foo(%d) = %d\n", n, foo(n)); + return 0; +} diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index d1a3f33c33d9..1ed8db15f979 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -1969,7 +1969,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -2482,7 +2482,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -3761,7 +3761,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -4274,7 +4274,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9456,7 +9456,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -9949,7 +9949,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11673,7 +11673,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -12166,7 +12166,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13885,7 +13885,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -14378,7 +14378,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16102,7 +16102,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -16595,7 +16595,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp index ee085daa60a5..4eda6d07645b 100644 --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -489,8 +489,8 @@ int main() { // CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 // CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -749,8 +749,8 @@ int main() { // CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 // CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 4 +// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -1702,7 +1702,7 @@ int main() { // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] @@ -2150,7 +2150,7 @@ int main() { // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -2640,7 +2640,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] @@ -3088,7 +3088,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -5006,4 +5006,3 @@ int main() { // CHECK11-NEXT: call void @__tgt_register_requires(i64 1) // CHECK11-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp index dec264ff9ea1..21c59d5ae125 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -62,7 +62,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 @@ -681,7 +681,7 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK2-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index 60b4685ed9b4..9c21a8789bc0 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -2077,7 +2077,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -2638,7 +2638,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4037,7 +4037,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -4598,7 +4598,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10344,7 +10344,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -10885,7 +10885,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12714,7 +12714,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -13255,7 +13255,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -15094,7 +15094,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -15635,7 +15635,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17464,7 +17464,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -18005,7 +18005,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp index ba48754a6b48..887117773d6e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -374,40 +374,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP21]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8 +// CHECK1-NEXT: store double [[TMP21]], double* [[CONV]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP24]], double* [[CONV9]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 +// CHECK1-NEXT: store double [[TMP24]], double* [[CONV9]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV10:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP28]], float* [[CONV11]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK1-NEXT: store float [[TMP28]], float* [[CONV11]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -483,38 +483,38 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -648,40 +648,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP21]], double* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8 +// CHECK2-NEXT: store double [[TMP21]], double* [[CONV]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP24]], double* [[CONV9]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 +// CHECK2-NEXT: store double [[TMP24]], double* [[CONV9]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV10:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV11:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK2-NEXT: store float [[TMP28]], float* [[CONV11]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK2-NEXT: store float [[TMP28]], float* [[CONV11]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -757,38 +757,38 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 +// CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK2-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 +// CHECK2-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -929,33 +929,33 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_CASTED]] to double* -// CHECK3-NEXT: store double [[TMP20]], double* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK3-NEXT: store double [[TMP20]], double* [[CONV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CONV9:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP24]], float* [[CONV9]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK3-NEXT: store float [[TMP24]], float* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1031,38 +1031,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP13]], align 4 +// CHECK3-NEXT: store double* [[G3]], double** [[TMP13]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1203,33 +1203,33 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_CASTED]] to double* -// CHECK4-NEXT: store double [[TMP20]], double* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK4-NEXT: store double [[TMP20]], double* [[CONV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CONV9:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK4-NEXT: store float [[TMP24]], float* [[CONV9]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK4-NEXT: store float [[TMP24]], float* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1305,38 +1305,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4 -// CHECK4-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4 -// CHECK4-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double* [[G3]], double** [[TMP13]], align 4 +// CHECK4-NEXT: store double* [[G3]], double** [[TMP13]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK4-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4 +// CHECK4-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1661,34 +1661,34 @@ int main() { // CHECK8-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4 +// CHECK8-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4 -// CHECK8-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]) +// CHECK8-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]), !llvm.access.group !5 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -1807,37 +1807,37 @@ int main() { // CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP25]] to i64 // CHECK8-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i64 0, i64 [[IDXPROM11]] // CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* // CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -2130,30 +2130,30 @@ int main() { // CHECK8-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]) +// CHECK8-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -2269,37 +2269,37 @@ int main() { // CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP25]] to i64 // CHECK8-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !17 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -2627,34 +2627,34 @@ int main() { // CHECK9-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]) +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2773,37 +2773,37 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i64 0, i64 [[IDXPROM11]] // CHECK9-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3096,30 +3096,30 @@ int main() { // CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]) +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3235,37 +3235,37 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK9-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3589,30 +3589,30 @@ int main() { // CHECK10-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4 -// CHECK10-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]) +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]), !llvm.access.group !6 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3727,35 +3727,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !10 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4046,27 +4046,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]) +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4179,35 +4179,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4531,30 +4531,30 @@ int main() { // CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4 -// CHECK11-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4669,35 +4669,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* // CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4988,27 +4988,27 @@ int main() { // CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -5121,35 +5121,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp index 67a1d863a6fe..a5c1ca79380a 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -184,22 +184,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -261,22 +261,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -336,26 +336,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -417,23 +417,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -547,22 +547,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -624,23 +624,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -700,26 +700,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -781,23 +781,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -863,35 +863,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -953,23 +953,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1081,22 +1081,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1158,23 +1158,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1234,26 +1234,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1315,23 +1315,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1397,35 +1397,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1487,23 +1487,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1593,22 +1593,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1670,22 +1670,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1745,26 +1745,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1826,23 +1826,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1956,22 +1956,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2033,23 +2033,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2109,26 +2109,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2190,23 +2190,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2272,35 +2272,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2362,23 +2362,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2490,22 +2490,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2567,23 +2567,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2643,26 +2643,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2724,23 +2724,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2806,35 +2806,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2896,23 +2896,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3002,22 +3002,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3079,22 +3079,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3154,26 +3154,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3235,23 +3235,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3365,22 +3365,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3442,23 +3442,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3693,41 +3693,41 @@ int main() { // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then6: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END23:%.*]] @@ -3817,7 +3817,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -3839,23 +3839,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -3945,7 +3945,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -3967,23 +3967,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4138,22 +4138,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4215,23 +4215,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4454,35 +4454,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4544,23 +4544,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4650,22 +4650,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4727,22 +4727,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4802,26 +4802,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4883,23 +4883,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5013,22 +5013,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5090,23 +5090,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5341,41 +5341,41 @@ int main() { // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then6: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END23:%.*]] @@ -5465,7 +5465,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5487,23 +5487,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5593,7 +5593,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5615,23 +5615,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5786,22 +5786,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5863,23 +5863,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6102,35 +6102,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6192,23 +6192,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6250,22 +6250,22 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6275,23 +6275,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z9gtid_testv() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6324,23 +6324,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn4v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6350,23 +6350,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn5v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6376,23 +6376,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn6v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6427,23 +6427,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn1v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6453,23 +6453,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn2v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6479,23 +6479,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn3v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6521,22 +6521,22 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6546,23 +6546,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z9gtid_testv() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6595,23 +6595,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn4v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6621,23 +6621,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn5v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6647,23 +6647,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn6v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6698,23 +6698,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn1v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6724,23 +6724,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn2v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6750,23 +6750,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn3v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6792,22 +6792,22 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6817,23 +6817,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK7-NEXT: call void @_Z9gtid_testv() +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6867,23 +6867,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn4v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6927,23 +6927,23 @@ int main() { // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn6v() +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7003,23 +7003,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn1v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7055,23 +7055,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn3v() +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7097,22 +7097,22 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7122,23 +7122,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK8-NEXT: call void @_Z9gtid_testv() +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7172,23 +7172,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn4v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7232,23 +7232,23 @@ int main() { // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn6v() +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7308,23 +7308,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn1v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7360,23 +7360,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn3v() +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7450,22 +7450,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7527,22 +7527,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7602,26 +7602,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7683,23 +7683,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7813,22 +7813,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7890,23 +7890,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7966,26 +7966,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8047,23 +8047,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8129,35 +8129,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8219,23 +8219,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8347,22 +8347,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8424,23 +8424,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8500,26 +8500,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8581,23 +8581,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8663,35 +8663,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8753,23 +8753,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8859,22 +8859,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8936,22 +8936,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9011,26 +9011,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9092,23 +9092,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9222,22 +9222,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9299,23 +9299,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9375,26 +9375,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9456,23 +9456,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9538,35 +9538,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9628,23 +9628,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9756,22 +9756,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9833,23 +9833,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9909,26 +9909,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9990,23 +9990,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10072,35 +10072,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10162,23 +10162,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10268,22 +10268,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10345,22 +10345,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10420,26 +10420,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10501,23 +10501,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10631,22 +10631,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10708,23 +10708,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10959,41 +10959,41 @@ int main() { // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then6: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END23:%.*]] @@ -11083,7 +11083,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11105,23 +11105,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11211,7 +11211,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11233,23 +11233,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11404,22 +11404,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11481,23 +11481,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11720,35 +11720,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11810,23 +11810,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11916,22 +11916,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11993,22 +11993,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12068,26 +12068,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12149,23 +12149,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12279,22 +12279,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12356,23 +12356,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12607,41 +12607,41 @@ int main() { // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK12-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK12-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then6: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END23:%.*]] @@ -12731,7 +12731,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -12753,23 +12753,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -12859,7 +12859,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -12881,23 +12881,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13052,22 +13052,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13129,23 +13129,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13368,35 +13368,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13458,23 +13458,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13516,22 +13516,22 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13541,23 +13541,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z9gtid_testv() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13590,23 +13590,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn4v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13616,23 +13616,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn5v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13642,23 +13642,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn6v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13693,23 +13693,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn1v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13719,23 +13719,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn2v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13745,23 +13745,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn3v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13787,22 +13787,22 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13812,23 +13812,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z9gtid_testv() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13861,23 +13861,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn4v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13887,23 +13887,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn5v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13913,23 +13913,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn6v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13964,23 +13964,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn1v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13990,23 +13990,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn2v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14016,23 +14016,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn3v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14058,22 +14058,22 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14083,23 +14083,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK15-NEXT: call void @_Z9gtid_testv() +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14133,23 +14133,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn4v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14193,23 +14193,23 @@ int main() { // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn6v() +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14269,23 +14269,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn1v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14321,23 +14321,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn3v() +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14363,22 +14363,22 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14388,23 +14388,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK16-NEXT: call void @_Z9gtid_testv() +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14438,23 +14438,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn4v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14498,23 +14498,23 @@ int main() { // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn6v() +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -14574,23 +14574,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn1v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14626,23 +14626,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn3v() +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 287119f3f6a9..c8e95951a61f 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -2999,7 +2999,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 @@ -3048,7 +3048,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 // CHECK4-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i64 40) // CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 15379282e853..5c33af387296 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1457,9 +1457,9 @@ int bar(int n){ // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 0) // CHECK1-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS2]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP4]], i64 0) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1568,19 +1568,19 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 // CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP5]], i64 0) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV2]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 @@ -1632,7 +1632,7 @@ int bar(int n){ // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[A1:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[A_ON_STACK]], align 4 @@ -1849,11 +1849,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp index fdd3cd6db8cf..b0738928a013 100644 --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -472,7 +472,7 @@ int bar(int n){ // CHECK-NEXT: [[D:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to i32* // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-NEXT: store i32 [[TMP3]], i32* [[D_ON_STACK]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: [[TMP5:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 34fc9d94501f..c3a90d523110 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -215,16 +215,16 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -265,9 +265,9 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP10]] to double @@ -342,19 +342,19 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 1 @@ -387,7 +387,7 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -468,14 +468,14 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 1 @@ -602,16 +602,16 @@ void unreachable_call() { // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -730,16 +730,16 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK2-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 @@ -854,11 +854,11 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 @@ -985,16 +985,16 @@ void unreachable_call() { // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1113,16 +1113,16 @@ void unreachable_call() { // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 @@ -1237,11 +1237,11 @@ void unreachable_call() { // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 5049500516f6..6e9385456492 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -108,7 +108,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 @@ -439,7 +439,7 @@ int bar(int n){ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK4-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c index 99b48cc792b2..b3e258d30d77 100644 --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -93,7 +93,7 @@ void CheckAllocaIsInEntryBlock() { // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK-64: if.then: diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index fbe62f191eec..53ca71dbcd9b 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -63,7 +63,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -86,7 +86,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: store i8 49, i8* [[CONV]], align 8 +// CHECK1-NEXT: store i8 49, i8* [[CONV]], align 1 // CHECK1-NEXT: ret void // // @@ -104,7 +104,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -127,7 +127,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: store i16 1, i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 1, i16* [[CONV]], align 2 // CHECK1-NEXT: ret void // // @@ -145,7 +145,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -228,7 +228,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 @@ -251,7 +251,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK2-NEXT: store i8 49, i8* [[CONV]], align 4 +// CHECK2-NEXT: store i8 49, i8* [[CONV]], align 1 // CHECK2-NEXT: ret void // // @@ -269,7 +269,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -292,7 +292,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: store i16 1, i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 1, i16* [[CONV]], align 2 // CHECK2-NEXT: ret void // // @@ -310,7 +310,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -393,7 +393,7 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 @@ -416,7 +416,7 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK3-NEXT: store i8 49, i8* [[CONV]], align 4 +// CHECK3-NEXT: store i8 49, i8* [[CONV]], align 1 // CHECK3-NEXT: ret void // // @@ -434,7 +434,7 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -457,7 +457,7 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: store i16 1, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 1, i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -475,7 +475,7 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index d0b57801530b..0cd88fb9b12c 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18488,11 +18488,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -18536,7 +18536,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* // CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -18583,11 +18583,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -18650,8 +18650,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -18690,7 +18690,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -18756,7 +18756,7 @@ int bar(int n){ // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18785,8 +18785,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -18811,7 +18811,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18849,7 +18849,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -18897,7 +18897,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18985,7 +18985,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -19257,7 +19257,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -19326,7 +19326,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -19445,7 +19445,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 10, i32* [[K]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 @@ -19490,7 +19490,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19532,9 +19532,9 @@ int bar(int n){ // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -19592,7 +19592,7 @@ int bar(int n){ // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV15:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV15]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19684,9 +19684,9 @@ int bar(int n){ // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -19809,7 +19809,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19850,7 +19850,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -19898,7 +19898,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19992,7 +19992,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20082,11 +20082,11 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -20130,7 +20130,7 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* // CHECK2-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20177,11 +20177,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -20244,8 +20244,8 @@ int bar(int n){ // CHECK2-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK2-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK2-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -20284,7 +20284,7 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20350,7 +20350,7 @@ int bar(int n){ // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20379,8 +20379,8 @@ int bar(int n){ // CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -20405,7 +20405,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20443,7 +20443,7 @@ int bar(int n){ // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20491,7 +20491,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20579,7 +20579,7 @@ int bar(int n){ // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20851,7 +20851,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -20920,7 +20920,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -21039,7 +21039,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 10, i32* [[K]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 @@ -21084,7 +21084,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21126,9 +21126,9 @@ int bar(int n){ // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21185,7 +21185,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV12]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21277,9 +21277,9 @@ int bar(int n){ // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21398,7 +21398,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21439,7 +21439,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -21487,7 +21487,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21581,7 +21581,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 98cfc7aa0db5..fd65a077c5d8 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -50,11 +50,11 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -96,7 +96,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -144,11 +144,11 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -242,7 +242,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -265,7 +265,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -948,11 +948,11 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -994,7 +994,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1042,11 +1042,11 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV8:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1140,7 +1140,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1163,7 +1163,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 754f4ce08ca0..0339a6d21899 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9401,11 +9401,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -9449,7 +9449,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* // CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -9496,11 +9496,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12 @@ -9575,8 +9575,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 // CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -9615,7 +9615,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -9681,7 +9681,7 @@ int bar(int n){ // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8, !llvm.access.group !16 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4, !llvm.access.group !16 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9722,8 +9722,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 // CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -9748,7 +9748,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9786,7 +9786,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -9834,7 +9834,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19 @@ -9934,7 +9934,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -10232,7 +10232,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -10301,7 +10301,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31 @@ -10428,7 +10428,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index cc7050a7cb07..52d94ab9b682 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -902,7 +902,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 @@ -1069,7 +1069,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK3-NEXT: [[ARGC3:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index d0a19bdb4396..f584b2b4676c 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4520,10 +4520,10 @@ int bar(int n){ // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i64 1) // CHECK1-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 // CHECK1-NEXT: [[D3:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 @@ -5839,7 +5839,7 @@ int bar(int n){ // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) // CHECK2-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 @@ -7157,7 +7157,7 @@ int bar(int n){ // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) // CHECK3-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp index 35eb8d05123e..a777faf5f03d 100644 --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -2300,14 +2300,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK9-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -2379,7 +2379,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 1 to i8*)) // CHECK9-NEXT: [[DOTT_VAR__ADDR:%.*]] = bitcast i8* [[DOTT_VAR__VOID_ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTT_VAR__ADDR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTT_VAR__ADDR]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* @@ -2521,9 +2521,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 // CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK9-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 -// CHECK9-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 1 @@ -2999,14 +2999,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK10-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -3078,7 +3078,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK10-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 1 to i8*)) // CHECK10-NEXT: [[DOTT_VAR__ADDR:%.*]] = bitcast i8* [[DOTT_VAR__VOID_ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTT_VAR__ADDR]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTT_VAR__ADDR]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* @@ -3220,9 +3220,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 // CHECK10-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK10-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 -// CHECK10-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 1 @@ -3763,9 +3763,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK11-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK11-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK11-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 @@ -3791,7 +3791,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK11-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK11-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK11-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 // CHECK11-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 @@ -3879,7 +3879,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK12-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK12-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK12-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 128 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 1 @@ -3894,7 +3894,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[G1]], align 128 // CHECK12-NEXT: store volatile i32 [[TMP2]], i32* [[BLOCK_CAPTURED]], align 128 // CHECK12-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[BLOCK_CAPTURED2]], align 32 // CHECK12-NEXT: [[TMP4:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]] to void ()* // CHECK12-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP4]] to %struct.__block_literal_generic* @@ -4029,7 +4029,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK12-NEXT: store i32* [[TMP5]], i32** [[BLOCK_CAPTURED]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK12-NEXT: store i32 [[TMP6]], i32* [[BLOCK_CAPTURED7]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 // CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 @@ -4119,9 +4119,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK12-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK12-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK12-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 @@ -4322,4 +4322,3 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP12]], i8* [[TMP22]], i8* inttoptr (i64 8 to i8*)) // CHECK17-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 3550e76f21c4..a4270c6493b2 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -298,7 +298,7 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -1304,7 +1304,7 @@ void range_for_collapsed() { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK1-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -1412,7 +1412,7 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -2418,7 +2418,7 @@ void range_for_collapsed() { // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK2-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK2-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -2526,7 +2526,7 @@ void range_for_collapsed() { // CHECK3-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -3532,7 +3532,7 @@ void range_for_collapsed() { // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK3-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -3640,7 +3640,7 @@ void range_for_collapsed() { // CHECK4-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -4646,7 +4646,7 @@ void range_for_collapsed() { // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK4-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK4-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK4-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -4697,7 +4697,7 @@ void range_for_collapsed() { // // // CHECK5-LABEL: define {{[^@]+}}@_Z17with_var_schedulev -// CHECK5-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG7:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -4754,7 +4754,7 @@ void range_for_collapsed() { // CHECK5-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1, !dbg [[DBG14]] // CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG14]] // CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG14]] @@ -5760,7 +5760,7 @@ void range_for_collapsed() { // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG111]] // CHECK5-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG111]] @@ -5868,7 +5868,7 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -6872,7 +6872,7 @@ void range_for_collapsed() { // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK6-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK6-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK6-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp index dc8343941a67..212fc8a6cc07 100644 --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -559,9 +559,9 @@ void parallel_master_allocate() { // CHECK13-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK13-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK13: omp_if.then: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK13-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK13-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] // CHECK13: omp_if.end: @@ -597,9 +597,9 @@ void parallel_master_allocate() { // CHECK14-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK14-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK14: omp_if.then: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK14-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK14-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] // CHECK14: omp_if.end: @@ -658,9 +658,9 @@ void parallel_master_allocate() { // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[B]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK17-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZN2St1yE, align 4 // CHECK17-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK17-NEXT: store i32 [[INC3]], i32* @_ZN2St1yE, align 4 @@ -754,9 +754,9 @@ void parallel_master_allocate() { // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK18-NEXT: store i32 [[ADD2]], i32* [[B]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK18-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZN2St1yE, align 4 // CHECK18-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK18-NEXT: store i32 [[INC3]], i32* @_ZN2St1yE, align 4 @@ -827,9 +827,9 @@ void parallel_master_allocate() { // CHECK21-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK21-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK21-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK21-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -865,9 +865,9 @@ void parallel_master_allocate() { // CHECK22-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK22-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK22: omp_if.then: -// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK22-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK22-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK22-NEXT: br label [[OMP_IF_END]] // CHECK22: omp_if.end: @@ -1005,4 +1005,3 @@ void parallel_master_allocate() { // CHECK29-SAME: () #[[ATTR4:[0-9]+]] comdat { // CHECK29-NEXT: ret i32* @a // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp index b0b1dfee0213..4fd6b958fd28 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -139,7 +139,7 @@ struct S { // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -258,7 +258,7 @@ struct S { // CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -392,7 +392,7 @@ struct S { // CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: store i8*** [[TMP1]], i8**** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -430,7 +430,7 @@ struct S { // CHECK1-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP23]], i8* align 8 [[TMP24]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1 // CHECK1-NEXT: [[TMP26:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 5 @@ -824,7 +824,7 @@ struct S { // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -1065,7 +1065,7 @@ struct S { // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -1184,7 +1184,7 @@ struct S { // CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -1318,7 +1318,7 @@ struct S { // CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK2-NEXT: store i8*** [[TMP1]], i8**** [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -1356,7 +1356,7 @@ struct S { // CHECK2-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 // CHECK2-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP23]], i8* align 8 [[TMP24]], i64 16, i1 false) -// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1 // CHECK2-NEXT: [[TMP26:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 5 @@ -1750,7 +1750,7 @@ struct S { // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK2-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp index 2b3752cf01cd..0c20823ba68f 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -141,7 +141,7 @@ struct S { // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -260,7 +260,7 @@ struct S { // CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -399,7 +399,7 @@ struct S { // CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -438,7 +438,7 @@ struct S { // CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP25]], i8* align 8 [[TMP26]], i64 24, i1 false) // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1 // CHECK1-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 5 @@ -746,7 +746,7 @@ struct S { // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -986,7 +986,7 @@ struct S { // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -1105,7 +1105,7 @@ struct S { // CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -1244,7 +1244,7 @@ struct S { // CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK2-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -1283,7 +1283,7 @@ struct S { // CHECK2-NEXT: [[TMP26:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP25]], i8* align 8 [[TMP26]], i64 24, i1 false) // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1 // CHECK2-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 5 @@ -1571,7 +1571,7 @@ struct S { // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK2-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -1831,7 +1831,7 @@ struct S { // CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -1950,7 +1950,7 @@ struct S { // CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -2090,11 +2090,11 @@ struct S { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -2133,7 +2133,7 @@ struct S { // CHECK3-NEXT: [[TMP28:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP27]], i8* align 8 [[TMP28]], i64 32, i1 false) // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP24]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP30]] to i1 // CHECK3-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL18]] to i32 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 5 @@ -2491,7 +2491,7 @@ struct S { // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK3-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -2731,7 +2731,7 @@ struct S { // CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -2850,7 +2850,7 @@ struct S { // CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -2990,11 +2990,11 @@ struct S { // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK4-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 // CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -3033,7 +3033,7 @@ struct S { // CHECK4-NEXT: [[TMP28:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP27]], i8* align 8 [[TMP28]], i64 32, i1 false) // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP24]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP30]] to i1 // CHECK4-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL18]] to i32 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 5 @@ -3391,7 +3391,7 @@ struct S { // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK4-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp index 4f7777cad610..7cb75113c13a 100644 --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -1031,9 +1031,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK1-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -1867,9 +1867,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK2-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -3049,9 +3049,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -3873,9 +3873,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -4170,129 +4170,129 @@ void array_func(int n, int a[n], St s[2]) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG9]] -// CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]), !dbg [[DBG10:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9]] -// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG9]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG10:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG10]] +// CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]), !dbg [[DBG11:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG10]] +// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG10]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev -// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG11:![0-9]+]] { +// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG12:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8 // CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]), !dbg [[DBG12:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG13:![0-9]+]] +// CHECK5-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]), !dbg [[DBG13:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG14:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_. -// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG14:![0-9]+]] { +// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG15:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG15:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG15]] -// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG15]] -// CHECK5-NEXT: ret void, !dbg [[DBG16:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG16:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG16]] +// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG16]] +// CHECK5-NEXT: ret void, !dbg [[DBG17:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev -// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG17:![0-9]+]] { +// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG18:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8 // CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG18:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG19:![0-9]+]] +// CHECK5-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG19:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG20:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_. -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG20:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG21:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG21:![0-9]+]] -// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG21]] -// CHECK5-NEXT: ret void, !dbg [[DBG21]] +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG22]] +// CHECK5-NEXT: ret void, !dbg [[DBG22]] // // // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1 -// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG22:![0-9]+]] { +// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG23:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG23]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG24:![0-9]+]] -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG24]] -// CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG24:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG25:![0-9]+]] +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG25]] +// CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG25]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG25]] // CHECK5-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG25]] // CHECK5: invoke.cont: -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG24]] -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG24]] -// CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG25]] +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG25]] +// CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG25]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23]] -// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG23]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG24]] +// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG24]] // CHECK5: lpad: // CHECK5-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK5-NEXT: cleanup, !dbg [[DBG25:![0-9]+]] -// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG25]] -// CHECK5-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG25]] -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG24]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: cleanup, !dbg [[DBG26:![0-9]+]] +// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG26]] +// CHECK5-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG25]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG25]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG24]] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG24]] -// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG24]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG24]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG25]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG25]] +// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG25]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG25]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG25]] // CHECK5: arraydestroy.done1: -// CHECK5-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG25]] // CHECK5: eh.resume: -// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG24]] -// CHECK5-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG24]] -// CHECK5-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG24]] -// CHECK5-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG24]] -// CHECK5-NEXT: resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG24]] +// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG25]] +// CHECK5-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG25]] +// CHECK5-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG25]] +// CHECK5-NEXT: resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG25]] // // // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2 -// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG26:![0-9]+]] { +// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG27:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG27:![0-9]+]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG27]] -// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG28:![0-9]+]] +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG28]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG28]] +// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG28]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG27]] -// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG27]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG28]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG28]] +// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG28]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG28]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG28]] // CHECK5: arraydestroy.done1: -// CHECK5-NEXT: ret void, !dbg [[DBG28:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG29:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3 -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG29:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG30:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG30:![0-9]+]] -// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG30]] -// CHECK5-NEXT: ret void, !dbg [[DBG30]] +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG31]] +// CHECK5-NEXT: ret void, !dbg [[DBG31]] // // // CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG31:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG32:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc), !dbg [[DBG33:![0-9]+]] // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG35:![0-9]+]] @@ -5053,9 +5053,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG168:![0-9]+]] // CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG168]] // CHECK5-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4, !dbg [[DBG168]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8, !dbg [[DBG169:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4, !dbg [[DBG169:![0-9]+]] // CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1, !dbg [[DBG169]] -// CHECK5-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8, !dbg [[DBG169]] +// CHECK5-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4, !dbg [[DBG169]] // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG167]] // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !dbg [[DBG170:![0-9]+]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1, !dbg [[DBG170]] @@ -5238,4 +5238,3 @@ void array_func(int n, int a[n], St s[2]) { // CHECK6-NEXT: store %struct.St* [[TMP19]], %struct.St** [[TMP15]], align 8 // CHECK6-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp index 371160354776..ef41980136b8 100644 --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -384,29 +384,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -697,29 +697,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1029,29 +1029,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1326,29 +1326,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK2-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK2-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK2-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK2-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK2-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK2-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK2: land.lhs.true17: -// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK2-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1639,29 +1639,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK2-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK2-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK2-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK2-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK2-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK2-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK2: land.lhs.true17: -// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK2-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1971,29 +1971,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK2-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK2-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK2-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK2-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK2-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK2-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK2: land.lhs.true17: -// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK2-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -2262,11 +2262,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -2279,7 +2279,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -2569,11 +2569,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -2586,7 +2586,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -2895,11 +2895,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -2912,7 +2912,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -3186,11 +3186,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK4-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -3203,7 +3203,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK4-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -3493,11 +3493,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK4-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -3510,7 +3510,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK4-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -3819,11 +3819,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK4-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -3836,7 +3836,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK4-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index 012198a5ee7f..bb478963849a 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -594,7 +594,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -612,9 +612,9 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -625,7 +625,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -643,11 +643,11 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -671,11 +671,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -696,14 +696,14 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: ret void // // @@ -738,7 +738,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -780,9 +780,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -1137,7 +1137,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1167,7 +1167,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1202,15 +1202,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1237,19 +1237,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -1271,11 +1271,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1299,14 +1299,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -1624,7 +1624,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1642,9 +1642,9 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1655,7 +1655,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1673,11 +1673,11 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -1701,11 +1701,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1726,14 +1726,14 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: ret void // // @@ -1768,7 +1768,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1810,9 +1810,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -2167,7 +2167,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2197,7 +2197,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2232,15 +2232,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -2267,19 +2267,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -2301,11 +2301,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2329,14 +2329,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -2679,7 +2679,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -2697,11 +2697,11 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -2727,7 +2727,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -2750,11 +2750,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -3246,11 +3246,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -3279,16 +3279,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -3312,7 +3312,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3338,11 +3338,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -3685,7 +3685,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3703,11 +3703,11 @@ int bar(int n){ // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -3733,7 +3733,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3756,11 +3756,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: ret void // // @@ -4252,11 +4252,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -4285,16 +4285,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK4-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -4318,7 +4318,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4344,11 +4344,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -4387,7 +4387,7 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4405,11 +4405,11 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -4433,11 +4433,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4458,14 +4458,14 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: ret void // // @@ -4500,7 +4500,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -4542,9 +4542,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -4599,15 +4599,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -4634,19 +4634,19 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -4673,7 +4673,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4703,7 +4703,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4734,11 +4734,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4762,14 +4762,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -4801,7 +4801,7 @@ int bar(int n){ // CHECK10-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4819,11 +4819,11 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK10-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -4847,11 +4847,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4872,14 +4872,14 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: ret void // // @@ -4914,7 +4914,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -4956,9 +4956,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -5013,15 +5013,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -5048,19 +5048,19 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK10-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5087,7 +5087,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5117,7 +5117,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5148,11 +5148,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5176,14 +5176,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -5215,7 +5215,7 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5233,11 +5233,11 @@ int bar(int n){ // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -5263,7 +5263,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5286,11 +5286,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: ret void // // @@ -5423,11 +5423,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5456,16 +5456,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK11-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5552,7 +5552,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5578,11 +5578,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -5614,7 +5614,7 @@ int bar(int n){ // CHECK12-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5632,11 +5632,11 @@ int bar(int n){ // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -5662,7 +5662,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5685,11 +5685,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: ret void // // @@ -5822,11 +5822,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5855,16 +5855,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK12-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK12-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5951,7 +5951,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5977,11 +5977,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -6292,7 +6292,7 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6310,9 +6310,9 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -6323,7 +6323,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6341,11 +6341,11 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK17-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -6369,11 +6369,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6394,14 +6394,14 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: ret void // // @@ -6436,7 +6436,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6478,9 +6478,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -6835,7 +6835,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -6865,7 +6865,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -6900,15 +6900,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -6935,19 +6935,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -6969,11 +6969,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6997,14 +6997,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -7322,7 +7322,7 @@ int bar(int n){ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7340,9 +7340,9 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -7353,7 +7353,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7371,11 +7371,11 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK18-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -7399,11 +7399,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7424,14 +7424,14 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: ret void // // @@ -7466,7 +7466,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7508,9 +7508,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -7865,7 +7865,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -7895,7 +7895,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7930,15 +7930,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -7965,19 +7965,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK18-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -7999,11 +7999,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8027,14 +8027,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -8377,7 +8377,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8395,11 +8395,11 @@ int bar(int n){ // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -8425,7 +8425,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8448,11 +8448,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: ret void // // @@ -8944,11 +8944,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -8977,16 +8977,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK19-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -9010,7 +9010,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9036,11 +9036,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -9383,7 +9383,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9401,11 +9401,11 @@ int bar(int n){ // CHECK20-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK20-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -9431,7 +9431,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9454,11 +9454,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: ret void // // @@ -9950,11 +9950,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9983,16 +9983,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK20-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10016,7 +10016,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10042,11 +10042,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -10085,7 +10085,7 @@ int bar(int n){ // CHECK25-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10103,11 +10103,11 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK25-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -10131,11 +10131,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10156,14 +10156,14 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: ret void // // @@ -10198,7 +10198,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10240,9 +10240,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -10297,15 +10297,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10332,19 +10332,19 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK25-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK25-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10371,7 +10371,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -10401,7 +10401,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10432,11 +10432,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10460,14 +10460,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -10499,7 +10499,7 @@ int bar(int n){ // CHECK26-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10517,11 +10517,11 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK26-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -10545,11 +10545,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10570,14 +10570,14 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: ret void // // @@ -10612,7 +10612,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10654,9 +10654,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -10711,15 +10711,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10746,19 +10746,19 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK26-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK26-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10785,7 +10785,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -10815,7 +10815,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10846,11 +10846,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10874,14 +10874,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -10913,7 +10913,7 @@ int bar(int n){ // CHECK27-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10931,11 +10931,11 @@ int bar(int n){ // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -10961,7 +10961,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10984,11 +10984,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: ret void // // @@ -11121,11 +11121,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11154,16 +11154,16 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK27-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK27-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -11250,7 +11250,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11276,11 +11276,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -11312,7 +11312,7 @@ int bar(int n){ // CHECK28-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11330,11 +11330,11 @@ int bar(int n){ // CHECK28-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK28-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -11360,7 +11360,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11383,11 +11383,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: ret void // // @@ -11520,11 +11520,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11553,16 +11553,16 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK28-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK28-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -11649,7 +11649,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11675,11 +11675,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 81d1b14750c8..f8cf931a431f 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -65,7 +65,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG24:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG23:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -246,7 +246,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG114]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG114]] @@ -256,7 +256,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4:[0-9]+]] !dbg [[DBG98:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG115:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -275,7 +275,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG123]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG123]] @@ -455,7 +455,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG191]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG191]] @@ -466,7 +466,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG168:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG192:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -485,7 +485,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG198]] @@ -687,7 +687,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG232:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG270:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index 0fc92b4262ce..6ee0d4a6a896 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -735,7 +735,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -802,9 +802,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -844,15 +844,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -888,9 +888,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -942,11 +942,11 @@ int bar(int n){ // CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -964,9 +964,9 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -1088,11 +1088,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1152,14 +1152,14 @@ int bar(int n){ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1210,11 +1210,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1270,7 +1270,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -1306,9 +1306,9 @@ int bar(int n){ // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -1685,7 +1685,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1753,7 +1753,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1800,15 +1800,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1854,11 +1854,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1920,14 +1920,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2373,7 +2373,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2440,9 +2440,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2482,15 +2482,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2526,9 +2526,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK2-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -2580,11 +2580,11 @@ int bar(int n){ // CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK2-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK2-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2602,9 +2602,9 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK2: .omp.linear.pu: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK2: .omp.linear.pu.done: // CHECK2-NEXT: ret void @@ -2726,11 +2726,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2790,14 +2790,14 @@ int bar(int n){ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK2-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2848,11 +2848,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2908,7 +2908,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -2944,9 +2944,9 @@ int bar(int n){ // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK2-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -3323,7 +3323,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3391,7 +3391,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK2-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3438,15 +3438,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3492,11 +3492,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3558,14 +3558,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -4103,7 +4103,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4197,11 +4197,11 @@ int bar(int n){ // CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4220,8 +4220,8 @@ int bar(int n){ // CHECK3: .omp.linear.pu: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -4343,7 +4343,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4405,11 +4405,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5040,11 +5040,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5091,7 +5091,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5155,11 +5155,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5697,7 +5697,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5791,11 +5791,11 @@ int bar(int n){ // CHECK4-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK4-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5814,8 +5814,8 @@ int bar(int n){ // CHECK4: .omp.linear.pu: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: // CHECK4-NEXT: ret void @@ -5937,7 +5937,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5999,11 +5999,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK4-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6634,11 +6634,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -6685,7 +6685,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6749,11 +6749,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -6879,15 +6879,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6923,9 +6923,9 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK9-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK9-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -6977,11 +6977,11 @@ int bar(int n){ // CHECK9-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK9-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK9-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6999,9 +6999,9 @@ int bar(int n){ // CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: // CHECK9-NEXT: ret void @@ -7024,11 +7024,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7088,14 +7088,14 @@ int bar(int n){ // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK9-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK9-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK9-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK9-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7146,11 +7146,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -7206,7 +7206,7 @@ int bar(int n){ // CHECK9-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -7242,9 +7242,9 @@ int bar(int n){ // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK9-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -7321,15 +7321,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -7380,7 +7380,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -7448,7 +7448,7 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7491,11 +7491,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7557,14 +7557,14 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7683,15 +7683,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7727,9 +7727,9 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK10-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK10-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -7781,11 +7781,11 @@ int bar(int n){ // CHECK10-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK10-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK10-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK10-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7803,9 +7803,9 @@ int bar(int n){ // CHECK10-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK10: .omp.linear.pu: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK10-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK10: .omp.linear.pu.done: // CHECK10-NEXT: ret void @@ -7828,11 +7828,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7892,14 +7892,14 @@ int bar(int n){ // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK10-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK10-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK10-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK10-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7950,11 +7950,11 @@ int bar(int n){ // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8010,7 +8010,7 @@ int bar(int n){ // CHECK10-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -8046,9 +8046,9 @@ int bar(int n){ // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK10-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -8125,15 +8125,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8184,7 +8184,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -8252,7 +8252,7 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK10-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK10-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8295,11 +8295,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8361,14 +8361,14 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK10-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK10-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -8485,7 +8485,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8579,11 +8579,11 @@ int bar(int n){ // CHECK11-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK11-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK11-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK11-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8602,8 +8602,8 @@ int bar(int n){ // CHECK11: .omp.linear.pu: // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK11-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: // CHECK11-NEXT: ret void @@ -8628,7 +8628,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8690,11 +8690,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK11-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8916,11 +8916,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9080,7 +9080,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9144,11 +9144,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -9265,7 +9265,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9359,11 +9359,11 @@ int bar(int n){ // CHECK12-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK12-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK12-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK12-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9382,8 +9382,8 @@ int bar(int n){ // CHECK12: .omp.linear.pu: // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK12-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK12-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK12: .omp.linear.pu.done: // CHECK12-NEXT: ret void @@ -9408,7 +9408,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9470,11 +9470,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK12-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK12-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK12-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9696,11 +9696,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9860,7 +9860,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9924,11 +9924,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK12-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK12-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -10367,7 +10367,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10434,9 +10434,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10476,15 +10476,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10520,9 +10520,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK17-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -10574,11 +10574,11 @@ int bar(int n){ // CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10596,9 +10596,9 @@ int bar(int n){ // CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -10720,11 +10720,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10784,14 +10784,14 @@ int bar(int n){ // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10842,11 +10842,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10902,7 +10902,7 @@ int bar(int n){ // CHECK17-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -10938,9 +10938,9 @@ int bar(int n){ // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -11317,7 +11317,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -11385,7 +11385,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -11432,15 +11432,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -11486,11 +11486,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -11552,14 +11552,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12005,7 +12005,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -12072,9 +12072,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK18-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12114,15 +12114,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -12158,9 +12158,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK18-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK18-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -12212,11 +12212,11 @@ int bar(int n){ // CHECK18-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK18-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK18-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12234,9 +12234,9 @@ int bar(int n){ // CHECK18-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK18: .omp.linear.pu: // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK18-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK18: .omp.linear.pu.done: // CHECK18-NEXT: ret void @@ -12358,11 +12358,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12422,14 +12422,14 @@ int bar(int n){ // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK18-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK18-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK18-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12480,11 +12480,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12540,7 +12540,7 @@ int bar(int n){ // CHECK18-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -12576,9 +12576,9 @@ int bar(int n){ // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK18-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -12955,7 +12955,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -13023,7 +13023,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK18-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -13070,15 +13070,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -13124,11 +13124,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13190,14 +13190,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK18-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -13735,7 +13735,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13829,11 +13829,11 @@ int bar(int n){ // CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13852,8 +13852,8 @@ int bar(int n){ // CHECK19: .omp.linear.pu: // CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK19-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -13975,7 +13975,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14037,11 +14037,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -14672,11 +14672,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -14723,7 +14723,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14787,11 +14787,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -15329,7 +15329,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15423,11 +15423,11 @@ int bar(int n){ // CHECK20-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK20-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK20-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -15446,8 +15446,8 @@ int bar(int n){ // CHECK20: .omp.linear.pu: // CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK20-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK20-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK20-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK20: .omp.linear.pu.done: // CHECK20-NEXT: ret void @@ -15569,7 +15569,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15631,11 +15631,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK20-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16266,11 +16266,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -16317,7 +16317,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16381,11 +16381,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -16511,15 +16511,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -16555,9 +16555,9 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK25-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK25-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK25-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -16609,11 +16609,11 @@ int bar(int n){ // CHECK25-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK25-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK25-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK25-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK25-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16631,9 +16631,9 @@ int bar(int n){ // CHECK25-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK25: .omp.linear.pu: // CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK25-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK25-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK25-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK25-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK25-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK25: .omp.linear.pu.done: // CHECK25-NEXT: ret void @@ -16656,11 +16656,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -16720,14 +16720,14 @@ int bar(int n){ // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK25-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK25-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK25-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK25-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16778,11 +16778,11 @@ int bar(int n){ // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -16838,7 +16838,7 @@ int bar(int n){ // CHECK25-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -16874,9 +16874,9 @@ int bar(int n){ // CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK25-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK25-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -16953,15 +16953,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17012,7 +17012,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17080,7 +17080,7 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK25-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK25-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -17123,11 +17123,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17189,14 +17189,14 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK25-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK25-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -17315,15 +17315,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -17359,9 +17359,9 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK26-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK26-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK26-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -17413,11 +17413,11 @@ int bar(int n){ // CHECK26-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK26-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK26-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK26-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK26-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK26-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17435,9 +17435,9 @@ int bar(int n){ // CHECK26-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK26: .omp.linear.pu: // CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK26-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK26-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK26-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK26-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK26-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK26-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK26: .omp.linear.pu.done: // CHECK26-NEXT: ret void @@ -17460,11 +17460,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17524,14 +17524,14 @@ int bar(int n){ // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK26-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK26-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK26-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK26-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK26-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17582,11 +17582,11 @@ int bar(int n){ // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK26-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -17642,7 +17642,7 @@ int bar(int n){ // CHECK26-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK26-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK26-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK26-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -17678,9 +17678,9 @@ int bar(int n){ // CHECK26-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK26-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK26-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -17757,15 +17757,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17816,7 +17816,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17884,7 +17884,7 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK26-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK26-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -17927,11 +17927,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17993,14 +17993,14 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK26-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK26-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18117,7 +18117,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18211,11 +18211,11 @@ int bar(int n){ // CHECK27-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK27-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK27-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK27-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK27-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18234,8 +18234,8 @@ int bar(int n){ // CHECK27: .omp.linear.pu: // CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK27-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK27-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK27-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK27: .omp.linear.pu.done: // CHECK27-NEXT: ret void @@ -18260,7 +18260,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18322,11 +18322,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK27-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK27-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK27-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK27-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18548,11 +18548,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -18712,7 +18712,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18776,11 +18776,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK27-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18897,7 +18897,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18991,11 +18991,11 @@ int bar(int n){ // CHECK28-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK28-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK28-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK28-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK28-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK28-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19014,8 +19014,8 @@ int bar(int n){ // CHECK28: .omp.linear.pu: // CHECK28-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK28-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK28-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK28-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK28-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK28-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK28: .omp.linear.pu.done: // CHECK28-NEXT: ret void @@ -19040,7 +19040,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19102,11 +19102,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK28-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK28-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK28-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK28-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19328,11 +19328,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -19492,7 +19492,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19556,11 +19556,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK28-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK28-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp index 87d1dfdd170f..b5a704c8f6d2 100644 --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -55,7 +55,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]], i1 zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG14:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]], i1 zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -316,7 +316,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG122]] @@ -326,7 +326,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] !dbg [[DBG106:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -349,10 +349,10 @@ int main() { // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG132]] // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1, !dbg [[DBG132]] // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG132]] @@ -604,7 +604,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG213]] @@ -615,7 +615,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG190:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -634,7 +634,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG222]] @@ -909,7 +909,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG269:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp index d2529bb85026..475217fdc2c6 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -715,7 +715,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -782,9 +782,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -831,15 +831,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -875,9 +875,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -929,11 +929,11 @@ int bar(int n){ // CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -958,9 +958,9 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -977,11 +977,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1041,14 +1041,14 @@ int bar(int n){ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1106,11 +1106,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1166,7 +1166,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -1202,9 +1202,9 @@ int bar(int n){ // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -1588,7 +1588,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1656,7 +1656,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1710,15 +1710,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1764,11 +1764,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1830,14 +1830,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2293,7 +2293,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2360,9 +2360,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2409,15 +2409,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2453,9 +2453,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK2-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -2507,11 +2507,11 @@ int bar(int n){ // CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK2-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK2-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2536,9 +2536,9 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK2: .omp.linear.pu: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK2: .omp.linear.pu.done: // CHECK2-NEXT: ret void @@ -2555,11 +2555,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2619,14 +2619,14 @@ int bar(int n){ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK2-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2684,11 +2684,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2744,7 +2744,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -2780,9 +2780,9 @@ int bar(int n){ // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK2-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -3166,7 +3166,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3234,7 +3234,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK2-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3288,15 +3288,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3342,11 +3342,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3408,14 +3408,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -3970,7 +3970,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4064,11 +4064,11 @@ int bar(int n){ // CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4114,7 +4114,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4176,11 +4176,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4832,11 +4832,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -4883,7 +4883,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4947,11 +4947,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5506,7 +5506,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5600,11 +5600,11 @@ int bar(int n){ // CHECK4-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK4-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5650,7 +5650,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5712,11 +5712,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK4-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6368,11 +6368,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -6419,7 +6419,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6483,11 +6483,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -6943,7 +6943,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7010,9 +7010,9 @@ int bar(int n){ // CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK5-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7059,15 +7059,15 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7103,9 +7103,9 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK5-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -7157,11 +7157,11 @@ int bar(int n){ // CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK5-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK5-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7186,9 +7186,9 @@ int bar(int n){ // CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK5-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -7205,11 +7205,11 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7269,14 +7269,14 @@ int bar(int n){ // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK5-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK5-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK5-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7334,11 +7334,11 @@ int bar(int n){ // CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -7394,7 +7394,7 @@ int bar(int n){ // CHECK5-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -7430,9 +7430,9 @@ int bar(int n){ // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK5-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK5-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -7848,17 +7848,17 @@ int bar(int n){ // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -7911,7 +7911,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -7942,7 +7942,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7994,7 +7994,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK5-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK5-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK5-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK5-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8052,15 +8052,15 @@ int bar(int n){ // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK5-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8106,11 +8106,11 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8172,14 +8172,14 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK5-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -8635,7 +8635,7 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8702,9 +8702,9 @@ int bar(int n){ // CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK6-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8751,15 +8751,15 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8795,9 +8795,9 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK6-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK6-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -8849,11 +8849,11 @@ int bar(int n){ // CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK6-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK6-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK6-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK6-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8878,9 +8878,9 @@ int bar(int n){ // CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK6: .omp.linear.pu: // CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK6-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK6-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK6: .omp.linear.pu.done: // CHECK6-NEXT: ret void @@ -8897,11 +8897,11 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8961,14 +8961,14 @@ int bar(int n){ // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK6-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK6-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK6-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK6-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK6-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9026,11 +9026,11 @@ int bar(int n){ // CHECK6-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK6-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9086,7 +9086,7 @@ int bar(int n){ // CHECK6-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -9122,9 +9122,9 @@ int bar(int n){ // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK6-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK6-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -9540,17 +9540,17 @@ int bar(int n){ // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK6-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK6-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -9603,7 +9603,7 @@ int bar(int n){ // CHECK6-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -9634,7 +9634,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK6-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK6-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK6-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -9686,7 +9686,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK6-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK6-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK6-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK6-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -9744,15 +9744,15 @@ int bar(int n){ // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK6-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -9798,11 +9798,11 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9864,14 +9864,14 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK6-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -10426,7 +10426,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10520,11 +10520,11 @@ int bar(int n){ // CHECK7-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK7-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK7-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10570,7 +10570,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10632,11 +10632,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK7-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK7-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11201,13 +11201,13 @@ int bar(int n){ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: @@ -11259,7 +11259,7 @@ int bar(int n){ // CHECK7-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: @@ -11402,11 +11402,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK7-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11453,7 +11453,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11517,11 +11517,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12076,7 +12076,7 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12170,11 +12170,11 @@ int bar(int n){ // CHECK8-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK8-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK8-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK8-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK8-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK8-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12220,7 +12220,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12282,11 +12282,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK8-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK8-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK8-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK8-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK8-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12851,13 +12851,13 @@ int bar(int n){ // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK8-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK8-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK8-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK8: omp_if.then: @@ -12909,7 +12909,7 @@ int bar(int n){ // CHECK8-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK8-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK8-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK8: omp_if.then: @@ -13052,11 +13052,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK8-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -13103,7 +13103,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13167,11 +13167,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -17302,15 +17302,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -17346,9 +17346,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK17-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -17400,11 +17400,11 @@ int bar(int n){ // CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17429,9 +17429,9 @@ int bar(int n){ // CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -17454,11 +17454,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17518,14 +17518,14 @@ int bar(int n){ // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17583,11 +17583,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -17643,7 +17643,7 @@ int bar(int n){ // CHECK17-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -17679,9 +17679,9 @@ int bar(int n){ // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -17765,15 +17765,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17824,7 +17824,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17892,7 +17892,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -17942,11 +17942,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18008,14 +18008,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18131,15 +18131,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -18175,9 +18175,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK18-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK18-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -18229,11 +18229,11 @@ int bar(int n){ // CHECK18-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK18-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK18-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18258,9 +18258,9 @@ int bar(int n){ // CHECK18-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK18: .omp.linear.pu: // CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK18-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK18-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK18: .omp.linear.pu.done: // CHECK18-NEXT: ret void @@ -18283,11 +18283,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18347,14 +18347,14 @@ int bar(int n){ // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK18-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK18-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK18-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK18-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18412,11 +18412,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18472,7 +18472,7 @@ int bar(int n){ // CHECK18-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -18508,9 +18508,9 @@ int bar(int n){ // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK18-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -18594,15 +18594,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -18653,7 +18653,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -18721,7 +18721,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK18-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -18771,11 +18771,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18837,14 +18837,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK18-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !29 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18958,7 +18958,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19052,11 +19052,11 @@ int bar(int n){ // CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19108,7 +19108,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19170,11 +19170,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19410,11 +19410,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -19581,7 +19581,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19645,11 +19645,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -19763,7 +19763,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19857,11 +19857,11 @@ int bar(int n){ // CHECK20-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK20-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK20-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19913,7 +19913,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19975,11 +19975,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK20-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK20-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20215,11 +20215,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -20386,7 +20386,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20450,11 +20450,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -20570,15 +20570,15 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -20614,9 +20614,9 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK21-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK21-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK21-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -20668,11 +20668,11 @@ int bar(int n){ // CHECK21-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK21-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK21-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20697,9 +20697,9 @@ int bar(int n){ // CHECK21-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK21: .omp.linear.pu: // CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK21-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK21-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK21-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK21-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK21-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK21: .omp.linear.pu.done: // CHECK21-NEXT: ret void @@ -20722,11 +20722,11 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -20786,14 +20786,14 @@ int bar(int n){ // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK21-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK21-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK21-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20851,11 +20851,11 @@ int bar(int n){ // CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK21-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20911,7 +20911,7 @@ int bar(int n){ // CHECK21-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -20947,9 +20947,9 @@ int bar(int n){ // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK21-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK21-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -21033,15 +21033,15 @@ int bar(int n){ // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK21-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -21099,17 +21099,17 @@ int bar(int n){ // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -21162,7 +21162,7 @@ int bar(int n){ // CHECK21-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -21193,7 +21193,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -21245,7 +21245,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK21-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK21-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK21-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK21-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -21299,11 +21299,11 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -21365,14 +21365,14 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK21-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -21488,15 +21488,15 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -21532,9 +21532,9 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK22-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK22-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK22-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -21586,11 +21586,11 @@ int bar(int n){ // CHECK22-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK22-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK22-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK22-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK22-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK22-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21615,9 +21615,9 @@ int bar(int n){ // CHECK22-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK22: .omp.linear.pu: // CHECK22-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK22-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK22-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK22-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK22-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK22-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK22: .omp.linear.pu.done: // CHECK22-NEXT: ret void @@ -21640,11 +21640,11 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -21704,14 +21704,14 @@ int bar(int n){ // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK22-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK22-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK22-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK22-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK22-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK22-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK22-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21769,11 +21769,11 @@ int bar(int n){ // CHECK22-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK22-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK22-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK22-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK22-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21829,7 +21829,7 @@ int bar(int n){ // CHECK22-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK22-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK22-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK22-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -21865,9 +21865,9 @@ int bar(int n){ // CHECK22-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK22-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK22-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK22-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -21951,15 +21951,15 @@ int bar(int n){ // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK22-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK22-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -22017,17 +22017,17 @@ int bar(int n){ // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK22-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK22-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK22-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -22080,7 +22080,7 @@ int bar(int n){ // CHECK22-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK22-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK22-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK22-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -22111,7 +22111,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK22-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK22-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK22-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK22-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22163,7 +22163,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK22-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK22-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK22-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK22-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK22-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22217,11 +22217,11 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22283,14 +22283,14 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK22-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -22404,7 +22404,7 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -22498,11 +22498,11 @@ int bar(int n){ // CHECK23-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK23-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK23-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22554,7 +22554,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -22616,11 +22616,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK23-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK23-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK23-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22856,11 +22856,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK23-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -22919,13 +22919,13 @@ int bar(int n){ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -22977,7 +22977,7 @@ int bar(int n){ // CHECK23-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -23116,7 +23116,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -23180,11 +23180,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -23298,7 +23298,7 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK24-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK24-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -23392,11 +23392,11 @@ int bar(int n){ // CHECK24-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK24-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK24-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK24-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK24-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK24-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK24-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23448,7 +23448,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -23510,11 +23510,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK24-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK24-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK24-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK24-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK24-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23750,11 +23750,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK24-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK24-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -23813,13 +23813,13 @@ int bar(int n){ // CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK24-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK24-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -23871,7 +23871,7 @@ int bar(int n){ // CHECK24-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK24-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK24-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK24-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -24010,7 +24010,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24074,11 +24074,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp index 68590aeffcb5..750310cd908a 100644 --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -498,11 +498,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -532,7 +532,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -552,7 +552,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -593,7 +593,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -647,7 +647,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -669,9 +669,9 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -686,11 +686,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -711,11 +711,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -1042,11 +1042,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -1076,7 +1076,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1096,7 +1096,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -1137,7 +1137,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -1191,7 +1191,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1213,9 +1213,9 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1230,11 +1230,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1255,11 +1255,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1585,7 +1585,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -1634,7 +1634,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -1675,7 +1675,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -1767,7 +1767,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1787,7 +1787,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2117,7 +2117,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -2166,7 +2166,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -2207,7 +2207,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -2299,7 +2299,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2319,7 +2319,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2343,7 +2343,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -2403,11 +2403,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -2437,7 +2437,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2457,7 +2457,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -2499,7 +2499,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2521,9 +2521,9 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2538,11 +2538,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2563,11 +2563,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2580,7 +2580,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -2640,11 +2640,11 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -2674,7 +2674,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2694,7 +2694,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -2736,7 +2736,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2758,9 +2758,9 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2775,11 +2775,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2800,11 +2800,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2817,7 +2817,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -2879,7 +2879,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -2928,7 +2928,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -3008,7 +3008,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -3028,7 +3028,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -3045,7 +3045,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -3107,7 +3107,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -3156,7 +3156,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -3236,7 +3236,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -3256,7 +3256,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -3580,11 +3580,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK17-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK17: omp_if.then: @@ -3614,7 +3614,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3634,7 +3634,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK17-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK17: omp_if.then: @@ -3675,7 +3675,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK17-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK17-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK17: omp_if.then: @@ -3729,7 +3729,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -3751,9 +3751,9 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3768,11 +3768,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3793,11 +3793,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -4124,11 +4124,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK18-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK18-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK18: omp_if.then: @@ -4158,7 +4158,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4178,7 +4178,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK18-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK18-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK18-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK18: omp_if.then: @@ -4219,7 +4219,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK18-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK18-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK18-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK18: omp_if.then: @@ -4273,7 +4273,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -4295,9 +4295,9 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4312,11 +4312,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4337,11 +4337,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4667,7 +4667,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK19-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK19: omp_if.then: @@ -4716,7 +4716,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK19-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK19: omp_if.then: @@ -4757,7 +4757,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK19-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK19-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK19: omp_if.then: @@ -4849,7 +4849,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4869,7 +4869,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5199,7 +5199,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK20-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK20-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK20-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK20: omp_if.then: @@ -5248,7 +5248,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK20-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK20-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK20-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK20: omp_if.then: @@ -5289,7 +5289,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK20-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK20-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK20-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK20: omp_if.then: @@ -5381,7 +5381,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5401,7 +5401,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5425,7 +5425,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK25-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK25-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK25-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK25: omp_if.then: @@ -5485,11 +5485,11 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK25-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK25-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK25: omp_if.then: @@ -5519,7 +5519,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5539,7 +5539,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK25-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK25-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK25-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK25: omp_if.then: @@ -5581,7 +5581,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -5603,9 +5603,9 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -5620,11 +5620,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5645,11 +5645,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -5662,7 +5662,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK26-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK26-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK26-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK26: omp_if.then: @@ -5722,11 +5722,11 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK26-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK26-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK26: omp_if.then: @@ -5756,7 +5756,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5776,7 +5776,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK26-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK26-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK26-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK26: omp_if.then: @@ -5818,7 +5818,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -5840,9 +5840,9 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5857,11 +5857,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5882,11 +5882,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5899,7 +5899,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK27-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK27-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK27-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK27: omp_if.then: @@ -5961,7 +5961,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK27-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK27-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK27: omp_if.then: @@ -6010,7 +6010,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK27-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK27-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK27-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK27: omp_if.then: @@ -6090,7 +6090,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -6110,7 +6110,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -6127,7 +6127,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK28-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK28-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK28-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK28: omp_if.then: @@ -6189,7 +6189,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK28-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK28-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK28-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK28: omp_if.then: @@ -6238,7 +6238,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK28-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK28-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK28-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK28: omp_if.then: @@ -6318,7 +6318,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -6338,7 +6338,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp index ee15b339271d..2c636c9187a1 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -465,9 +465,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -488,7 +488,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -530,7 +530,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -553,7 +553,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -603,14 +603,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -631,11 +631,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -922,9 +922,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -945,7 +945,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -987,7 +987,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1010,7 +1010,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1060,14 +1060,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1088,11 +1088,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1505,13 +1505,13 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1531,7 +1531,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1948,13 +1948,13 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1974,7 +1974,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1996,7 +1996,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2019,7 +2019,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2049,9 +2049,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2072,7 +2072,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2141,14 +2141,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2169,11 +2169,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2184,7 +2184,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2207,7 +2207,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2237,9 +2237,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2260,7 +2260,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2329,14 +2329,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2357,11 +2357,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2510,13 +2510,13 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2536,7 +2536,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2689,13 +2689,13 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2715,7 +2715,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2999,9 +2999,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3022,7 +3022,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3064,7 +3064,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3087,7 +3087,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3137,14 +3137,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3165,11 +3165,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3456,9 +3456,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3479,7 +3479,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3521,7 +3521,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3544,7 +3544,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3594,14 +3594,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3622,11 +3622,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4039,13 +4039,13 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4065,7 +4065,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4482,13 +4482,13 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4508,7 +4508,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4530,7 +4530,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4553,7 +4553,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4583,9 +4583,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4606,7 +4606,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4675,14 +4675,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4703,11 +4703,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -4718,7 +4718,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4741,7 +4741,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4771,9 +4771,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4794,7 +4794,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4863,14 +4863,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4891,11 +4891,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5044,13 +5044,13 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5070,7 +5070,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5223,13 +5223,13 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5249,7 +5249,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 387be05f7bc0..7b3a3d825154 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -684,10 +684,10 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -822,7 +822,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -840,9 +840,9 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -853,7 +853,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -871,11 +871,11 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK1-NEXT: ret void // // @@ -890,11 +890,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -915,14 +915,14 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: ret void // // @@ -957,7 +957,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -999,9 +999,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -1045,7 +1045,7 @@ int bar(int n){ // CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -1064,7 +1064,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -1092,7 +1092,7 @@ int bar(int n){ // CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -1501,7 +1501,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1531,7 +1531,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1566,15 +1566,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1601,19 +1601,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -1635,11 +1635,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1663,14 +1663,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -2067,10 +2067,10 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2205,7 +2205,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2223,9 +2223,9 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -2236,7 +2236,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2254,11 +2254,11 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK2-NEXT: ret void // // @@ -2273,11 +2273,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2298,14 +2298,14 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: ret void // // @@ -2340,7 +2340,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2382,9 +2382,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -2428,7 +2428,7 @@ int bar(int n){ // CHECK2-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -2447,7 +2447,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -2475,7 +2475,7 @@ int bar(int n){ // CHECK2-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -2884,7 +2884,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2914,7 +2914,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2949,15 +2949,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -2984,19 +2984,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -3018,11 +3018,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3046,14 +3046,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -3444,7 +3444,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3605,7 +3605,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3623,11 +3623,11 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -3644,7 +3644,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3667,11 +3667,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -4299,11 +4299,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -4332,16 +4332,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -4365,7 +4365,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4391,11 +4391,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -4786,7 +4786,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4947,7 +4947,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4965,11 +4965,11 @@ int bar(int n){ // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK4-NEXT: ret void // // @@ -4986,7 +4986,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5009,11 +5009,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: ret void // // @@ -5641,11 +5641,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5674,16 +5674,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK4-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5707,7 +5707,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5733,11 +5733,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -5766,10 +5766,10 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5797,7 +5797,7 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5815,11 +5815,11 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK9-NEXT: ret void // // @@ -5834,11 +5834,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5859,14 +5859,14 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: ret void // // @@ -5901,7 +5901,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -5943,9 +5943,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -5989,7 +5989,7 @@ int bar(int n){ // CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6008,7 +6008,7 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6036,7 +6036,7 @@ int bar(int n){ // CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6113,15 +6113,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -6148,19 +6148,19 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -6187,7 +6187,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -6217,7 +6217,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -6248,11 +6248,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6276,14 +6276,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -6305,10 +6305,10 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6336,7 +6336,7 @@ int bar(int n){ // CHECK10-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6354,11 +6354,11 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK10-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK10-NEXT: ret void // // @@ -6373,11 +6373,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6398,14 +6398,14 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: ret void // // @@ -6440,7 +6440,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6482,9 +6482,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -6528,7 +6528,7 @@ int bar(int n){ // CHECK10-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6547,7 +6547,7 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6575,7 +6575,7 @@ int bar(int n){ // CHECK10-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6652,15 +6652,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -6687,19 +6687,19 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK10-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -6726,7 +6726,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -6756,7 +6756,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -6787,11 +6787,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6815,14 +6815,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -6845,7 +6845,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6873,7 +6873,7 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6891,11 +6891,11 @@ int bar(int n){ // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK11-NEXT: ret void // // @@ -6912,7 +6912,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6935,11 +6935,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: ret void // // @@ -7177,11 +7177,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -7210,16 +7210,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK11-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -7306,7 +7306,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7332,11 +7332,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -7359,7 +7359,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7387,7 +7387,7 @@ int bar(int n){ // CHECK12-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7405,11 +7405,11 @@ int bar(int n){ // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK12-NEXT: ret void // // @@ -7426,7 +7426,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7449,11 +7449,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: ret void // // @@ -7691,11 +7691,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -7724,16 +7724,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK12-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK12-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -7820,7 +7820,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7846,11 +7846,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -8240,10 +8240,10 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8378,7 +8378,7 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8396,9 +8396,9 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -8409,7 +8409,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8427,11 +8427,11 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK17-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK17-NEXT: ret void // // @@ -8446,11 +8446,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8471,14 +8471,14 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: ret void // // @@ -8513,7 +8513,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8555,9 +8555,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -8601,7 +8601,7 @@ int bar(int n){ // CHECK17-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -8620,7 +8620,7 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -8648,7 +8648,7 @@ int bar(int n){ // CHECK17-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -9057,7 +9057,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -9087,7 +9087,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -9122,15 +9122,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -9157,19 +9157,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -9191,11 +9191,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9219,14 +9219,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -9623,10 +9623,10 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9761,7 +9761,7 @@ int bar(int n){ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9779,9 +9779,9 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -9792,7 +9792,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9810,11 +9810,11 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK18-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK18-NEXT: ret void // // @@ -9829,11 +9829,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9854,14 +9854,14 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: ret void // // @@ -9896,7 +9896,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9938,9 +9938,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -9984,7 +9984,7 @@ int bar(int n){ // CHECK18-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -10003,7 +10003,7 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -10031,7 +10031,7 @@ int bar(int n){ // CHECK18-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -10440,7 +10440,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -10470,7 +10470,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10505,15 +10505,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10540,19 +10540,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK18-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10574,11 +10574,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10602,14 +10602,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -11000,7 +11000,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11161,7 +11161,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11179,11 +11179,11 @@ int bar(int n){ // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK19-NEXT: ret void // // @@ -11200,7 +11200,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11223,11 +11223,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: ret void // // @@ -11855,11 +11855,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11888,16 +11888,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK19-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -11921,7 +11921,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11947,11 +11947,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -12342,7 +12342,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12503,7 +12503,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12521,11 +12521,11 @@ int bar(int n){ // CHECK20-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK20-NEXT: ret void // // @@ -12542,7 +12542,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12565,11 +12565,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: ret void // // @@ -13197,11 +13197,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -13230,16 +13230,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK20-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -13263,7 +13263,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13289,11 +13289,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -13322,10 +13322,10 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13353,7 +13353,7 @@ int bar(int n){ // CHECK25-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13371,11 +13371,11 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK25-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK25-NEXT: ret void // // @@ -13390,11 +13390,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13415,14 +13415,14 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: ret void // // @@ -13457,7 +13457,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -13499,9 +13499,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -13545,7 +13545,7 @@ int bar(int n){ // CHECK25-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -13564,7 +13564,7 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -13592,7 +13592,7 @@ int bar(int n){ // CHECK25-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -13669,15 +13669,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -13704,19 +13704,19 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK25-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK25-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -13743,7 +13743,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -13773,7 +13773,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -13804,11 +13804,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13832,14 +13832,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -13861,10 +13861,10 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13892,7 +13892,7 @@ int bar(int n){ // CHECK26-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13910,11 +13910,11 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK26-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK26-NEXT: ret void // // @@ -13929,11 +13929,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13954,14 +13954,14 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: ret void // // @@ -13996,7 +13996,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -14038,9 +14038,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -14084,7 +14084,7 @@ int bar(int n){ // CHECK26-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -14103,7 +14103,7 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -14131,7 +14131,7 @@ int bar(int n){ // CHECK26-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -14208,15 +14208,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -14243,19 +14243,19 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK26-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK26-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -14282,7 +14282,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -14312,7 +14312,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -14343,11 +14343,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -14371,14 +14371,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -14401,7 +14401,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14429,7 +14429,7 @@ int bar(int n){ // CHECK27-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14447,11 +14447,11 @@ int bar(int n){ // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK27-NEXT: ret void // // @@ -14468,7 +14468,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14491,11 +14491,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: ret void // // @@ -14733,11 +14733,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -14766,16 +14766,16 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK27-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK27-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -14862,7 +14862,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14888,11 +14888,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -14915,7 +14915,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14943,7 +14943,7 @@ int bar(int n){ // CHECK28-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14961,11 +14961,11 @@ int bar(int n){ // CHECK28-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK28-NEXT: ret void // // @@ -14982,7 +14982,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15005,11 +15005,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: ret void // // @@ -15247,11 +15247,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -15280,16 +15280,16 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK28-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK28-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -15376,7 +15376,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15402,11 +15402,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp index 63e5e4dc10ef..300caa04c22d 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -648,10 +648,10 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -837,7 +837,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -893,9 +893,9 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -918,7 +918,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -974,11 +974,11 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1005,11 +1005,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1068,14 +1068,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1126,11 +1126,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1186,7 +1186,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -1221,9 +1221,9 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -1637,7 +1637,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1705,7 +1705,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1756,19 +1756,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1809,9 +1809,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1864,19 +1864,19 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -1914,11 +1914,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1980,14 +1980,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2368,10 +2368,10 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2557,7 +2557,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2613,9 +2613,9 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2638,7 +2638,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2694,11 +2694,11 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2725,11 +2725,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2788,14 +2788,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2846,11 +2846,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2906,7 +2906,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -2941,9 +2941,9 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -3357,7 +3357,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3425,7 +3425,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3476,19 +3476,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3529,9 +3529,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3584,19 +3584,19 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK2-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK2-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -3634,11 +3634,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3700,14 +3700,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -4083,7 +4083,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4345,7 +4345,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4401,11 +4401,11 @@ int bar(int n){ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4434,7 +4434,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4495,11 +4495,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5171,11 +5171,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5272,16 +5272,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -5321,7 +5321,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5385,11 +5385,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5765,7 +5765,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6027,7 +6027,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6083,11 +6083,11 @@ int bar(int n){ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6116,7 +6116,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6177,11 +6177,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6853,11 +6853,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -6954,16 +6954,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -7003,7 +7003,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7067,11 +7067,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7112,10 +7112,10 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7193,7 +7193,7 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7249,11 +7249,11 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7280,11 +7280,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7343,14 +7343,14 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7401,11 +7401,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -7461,7 +7461,7 @@ int bar(int n){ // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -7496,9 +7496,9 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -7579,19 +7579,19 @@ int bar(int n){ // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -7632,9 +7632,9 @@ int bar(int n){ // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -7687,19 +7687,19 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK9-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK9-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK9-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -7742,7 +7742,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -7810,7 +7810,7 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7853,11 +7853,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7919,14 +7919,14 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7960,10 +7960,10 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8041,7 +8041,7 @@ int bar(int n){ // CHECK10-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8097,11 +8097,11 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8128,11 +8128,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8191,14 +8191,14 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK10-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8249,11 +8249,11 @@ int bar(int n){ // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8309,7 +8309,7 @@ int bar(int n){ // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -8344,9 +8344,9 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -8427,19 +8427,19 @@ int bar(int n){ // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK10-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8480,9 +8480,9 @@ int bar(int n){ // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -8535,19 +8535,19 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK10-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK10-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK10-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK10-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK10-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK10-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK10-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -8590,7 +8590,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -8658,7 +8658,7 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK10-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8701,11 +8701,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8767,14 +8767,14 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK10-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -8809,7 +8809,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8887,7 +8887,7 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8943,11 +8943,11 @@ int bar(int n){ // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8976,7 +8976,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9037,11 +9037,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9268,11 +9268,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9369,16 +9369,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK11-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK11-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK11-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -9531,7 +9531,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9595,11 +9595,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -9634,7 +9634,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9712,7 +9712,7 @@ int bar(int n){ // CHECK12-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9768,11 +9768,11 @@ int bar(int n){ // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK12-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9801,7 +9801,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9862,11 +9862,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK12-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK12-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10093,11 +10093,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -10194,16 +10194,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK12-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK12-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK12-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK12-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -10356,7 +10356,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10420,11 +10420,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK12-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK12-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -10798,10 +10798,10 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10987,7 +10987,7 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -11043,9 +11043,9 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11068,7 +11068,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -11124,11 +11124,11 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11155,11 +11155,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -11218,14 +11218,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11276,11 +11276,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11336,7 +11336,7 @@ int bar(int n){ // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -11371,9 +11371,9 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -11787,7 +11787,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -11855,7 +11855,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -11906,19 +11906,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -11959,9 +11959,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -12014,19 +12014,19 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -12064,11 +12064,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12130,14 +12130,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12518,10 +12518,10 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12707,7 +12707,7 @@ int bar(int n){ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -12763,9 +12763,9 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12788,7 +12788,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12844,11 +12844,11 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12875,11 +12875,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12938,14 +12938,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12996,11 +12996,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -13056,7 +13056,7 @@ int bar(int n){ // CHECK18-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -13091,9 +13091,9 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -13507,7 +13507,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -13575,7 +13575,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -13626,19 +13626,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -13679,9 +13679,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -13734,19 +13734,19 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK18-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK18-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK18-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK18-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -13784,11 +13784,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13850,14 +13850,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -14233,7 +14233,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14495,7 +14495,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14551,11 +14551,11 @@ int bar(int n){ // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -14584,7 +14584,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14645,11 +14645,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -15321,11 +15321,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -15422,16 +15422,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK19-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -15471,7 +15471,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15535,11 +15535,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -15915,7 +15915,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16177,7 +16177,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16233,11 +16233,11 @@ int bar(int n){ // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16266,7 +16266,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16327,11 +16327,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17003,11 +17003,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -17104,16 +17104,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK20-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK20-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK20-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -17153,7 +17153,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -17217,11 +17217,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -17262,10 +17262,10 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17343,7 +17343,7 @@ int bar(int n){ // CHECK25-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17399,11 +17399,11 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17430,11 +17430,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17493,14 +17493,14 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK25-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17551,11 +17551,11 @@ int bar(int n){ // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -17611,7 +17611,7 @@ int bar(int n){ // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -17646,9 +17646,9 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK25-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK25-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -17729,19 +17729,19 @@ int bar(int n){ // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK25-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK25-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17782,9 +17782,9 @@ int bar(int n){ // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -17837,19 +17837,19 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK25-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK25-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK25-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK25-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK25-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK25-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK25-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK25-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK25-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK25-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK25-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -17892,7 +17892,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17960,7 +17960,7 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK25-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -18003,11 +18003,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18069,14 +18069,14 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK25-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18110,10 +18110,10 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18191,7 +18191,7 @@ int bar(int n){ // CHECK26-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18247,11 +18247,11 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18278,11 +18278,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18341,14 +18341,14 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK26-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18399,11 +18399,11 @@ int bar(int n){ // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK26-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18459,7 +18459,7 @@ int bar(int n){ // CHECK26-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK26-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK26-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK26-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -18494,9 +18494,9 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK26-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK26-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -18577,19 +18577,19 @@ int bar(int n){ // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK26-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK26-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK26-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -18630,9 +18630,9 @@ int bar(int n){ // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -18685,19 +18685,19 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK26-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK26-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK26-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK26-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK26-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK26-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK26-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK26-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK26-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK26-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK26-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK26-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK26-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -18740,7 +18740,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -18808,7 +18808,7 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK26-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -18851,11 +18851,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18917,14 +18917,14 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK26-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18959,7 +18959,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19037,7 +19037,7 @@ int bar(int n){ // CHECK27-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19093,11 +19093,11 @@ int bar(int n){ // CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK27-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19126,7 +19126,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19187,11 +19187,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK27-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19418,11 +19418,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -19519,16 +19519,16 @@ int bar(int n){ // CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK27-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK27-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK27-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK27-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK27-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK27-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK27-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -19681,7 +19681,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19745,11 +19745,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK27-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -19784,7 +19784,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19862,7 +19862,7 @@ int bar(int n){ // CHECK28-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19918,11 +19918,11 @@ int bar(int n){ // CHECK28-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK28-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK28-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19951,7 +19951,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20012,11 +20012,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK28-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK28-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20243,11 +20243,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -20344,16 +20344,16 @@ int bar(int n){ // CHECK28-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK28-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK28-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK28-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK28-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK28-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK28-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK28-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK28-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -20506,7 +20506,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20570,11 +20570,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK28-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK28-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp index 10a62a2c6f17..e5b6eac75b0d 100644 --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -820,11 +820,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -868,9 +868,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1264,11 +1264,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1312,9 +1312,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -2415,4 +2415,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp index 8a278c56a29e..6022cf67baf2 100644 --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -1755,7 +1755,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1789,7 +1789,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -1871,7 +1871,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1905,7 +1905,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -1991,11 +1991,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2032,7 +2032,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2049,7 +2049,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -2700,7 +2700,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2734,7 +2734,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2816,7 +2816,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2850,7 +2850,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2936,11 +2936,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2977,7 +2977,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2994,7 +2994,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp index 778dd0a0c3b0..3af5b56cf7ed 100644 --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -345,11 +345,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -446,7 +446,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -458,9 +458,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -694,7 +694,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -794,7 +794,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1135,11 +1135,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1236,7 +1236,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1248,9 +1248,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1484,7 +1484,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1584,7 +1584,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -3407,7 +3407,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3416,7 +3416,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3481,10 +3481,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -3664,7 +3664,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3673,7 +3673,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3738,10 +3738,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -3780,4 +3780,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp index 20008403c7c1..51e4b21eb6c6 100644 --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -182,11 +182,11 @@ int main() { // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -298,9 +298,9 @@ int main() { // CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[TMP19]], align 8 // CHECK1-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP22]], float* [[CONV3]], align 8 +// CHECK1-NEXT: store float [[TMP22]], float* [[CONV3]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -361,11 +361,11 @@ int main() { // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -477,9 +477,9 @@ int main() { // CHECK2-NEXT: [[TMP20:%.*]] = load double, double* [[TMP19]], align 8 // CHECK2-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP22]], float* [[CONV3]], align 8 +// CHECK2-NEXT: store float [[TMP22]], float* [[CONV3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -977,12 +977,12 @@ int main() { // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1104,7 +1104,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -1129,7 +1129,7 @@ int main() { // CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -1320,7 +1320,7 @@ int main() { // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1439,7 +1439,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -1683,12 +1683,12 @@ int main() { // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1810,7 +1810,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -1835,7 +1835,7 @@ int main() { // CHECK10-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK10-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 // CHECK10-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK10: .omp.lastprivate.done: // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -2026,7 +2026,7 @@ int main() { // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2145,7 +2145,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -3615,4 +3615,3 @@ int main() { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index 1b0bd4615118..2148b693e151 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -136,10 +136,10 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -171,7 +171,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -217,7 +217,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -267,7 +267,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -369,7 +369,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -404,7 +404,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -450,7 +450,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -503,7 +503,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -754,10 +754,10 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -789,7 +789,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -835,7 +835,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -885,7 +885,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -987,7 +987,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1022,7 +1022,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1068,7 +1068,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1121,7 +1121,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1379,10 +1379,10 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK3-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1414,7 +1414,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1460,7 +1460,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1510,7 +1510,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1612,7 +1612,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1647,7 +1647,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1693,7 +1693,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1746,7 +1746,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3046,10 +3046,10 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3081,7 +3081,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3127,7 +3127,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3177,7 +3177,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3279,7 +3279,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3314,7 +3314,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3360,7 +3360,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3413,7 +3413,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3508,10 +3508,10 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK11-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3543,7 +3543,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3589,7 +3589,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3639,7 +3639,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3741,7 +3741,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3776,7 +3776,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3822,7 +3822,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3875,7 +3875,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp index 7ed17efc5ec4..281654218d09 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -1117,11 +1117,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1167,9 +1167,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1224,11 +1224,11 @@ int main (int argc, char **argv) { // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1291,9 +1291,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1766,11 +1766,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1816,9 +1816,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1873,11 +1873,11 @@ int main (int argc, char **argv) { // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1940,9 +1940,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -3524,4 +3524,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp index c5517aacd570..23e40d279777 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -2633,7 +2633,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2668,7 +2668,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2714,7 +2714,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2767,7 +2767,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2855,7 +2855,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2890,7 +2890,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2936,7 +2936,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2989,7 +2989,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3081,11 +3081,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3124,7 +3124,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3141,7 +3141,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -3172,11 +3172,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3255,7 +3255,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3733,7 +3733,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3766,7 +3766,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -3793,7 +3793,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4173,7 +4173,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4208,7 +4208,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4254,7 +4254,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4307,7 +4307,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4395,7 +4395,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4430,7 +4430,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4476,7 +4476,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4529,7 +4529,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4621,11 +4621,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4664,7 +4664,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4681,7 +4681,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -4712,11 +4712,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4795,7 +4795,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5273,7 +5273,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -5306,7 +5306,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -5333,7 +5333,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8423,4 +8423,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp index 3bac42768739..643cdf2cd375 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -406,11 +406,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -509,11 +509,11 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -677,7 +677,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -689,9 +689,9 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -892,7 +892,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -993,7 +993,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1140,7 +1140,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1468,11 +1468,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1571,11 +1571,11 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1739,7 +1739,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1751,9 +1751,9 @@ int main() { // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1954,7 +1954,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2055,7 +2055,7 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2202,7 +2202,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -4528,7 +4528,7 @@ int main() { // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4537,7 +4537,7 @@ int main() { // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4604,7 +4604,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4613,7 +4613,7 @@ int main() { // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4699,10 +4699,10 @@ int main() { // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -4882,7 +4882,7 @@ int main() { // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK6-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4891,7 +4891,7 @@ int main() { // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4958,7 +4958,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4967,7 +4967,7 @@ int main() { // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5053,10 +5053,10 @@ int main() { // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5116,11 +5116,11 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5219,11 +5219,11 @@ int main() { // CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5387,7 +5387,7 @@ int main() { // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -5399,9 +5399,9 @@ int main() { // CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5458,7 +5458,7 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -5559,7 +5559,7 @@ int main() { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -5706,7 +5706,7 @@ int main() { // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -5855,11 +5855,11 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5958,11 +5958,11 @@ int main() { // CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6126,7 +6126,7 @@ int main() { // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -6138,9 +6138,9 @@ int main() { // CHECK14-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6197,7 +6197,7 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK14-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -6298,7 +6298,7 @@ int main() { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -6445,7 +6445,7 @@ int main() { // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -8015,7 +8015,7 @@ int main() { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -8024,7 +8024,7 @@ int main() { // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -8091,7 +8091,7 @@ int main() { // CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -8100,7 +8100,7 @@ int main() { // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -8186,10 +8186,10 @@ int main() { // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 // CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 @@ -8212,4 +8212,3 @@ int main() { // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK17-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 5c9b5be7444d..e17961e76ae3 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -763,7 +763,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -822,7 +822,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1264,7 +1264,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -1323,7 +1323,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -2087,7 +2087,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2146,7 +2146,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -2588,7 +2588,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2647,7 +2647,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -3411,7 +3411,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -3470,7 +3470,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -3912,7 +3912,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -3971,7 +3971,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -4735,7 +4735,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -4794,7 +4794,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -5236,7 +5236,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -5295,7 +5295,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -6059,7 +6059,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -6118,7 +6118,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -6560,7 +6560,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -6619,7 +6619,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -7383,7 +7383,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -7442,7 +7442,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -7884,7 +7884,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -7943,7 +7943,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -8707,7 +8707,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -8766,7 +8766,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -9208,7 +9208,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9267,7 +9267,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -10031,7 +10031,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10090,7 +10090,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: @@ -10532,7 +10532,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10591,7 +10591,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp index 7a368c75c244..38d9f4673b0b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -213,11 +213,11 @@ int main() { // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -338,9 +338,9 @@ int main() { // CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 8 // CHECK1-NEXT: store volatile double [[TMP27]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP29]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP29]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -459,9 +459,9 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP2]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: store float [[TMP24]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP24]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -518,11 +518,11 @@ int main() { // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -643,9 +643,9 @@ int main() { // CHECK2-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 8 // CHECK2-NEXT: store volatile double [[TMP27]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP29]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP29]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -764,9 +764,9 @@ int main() { // CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK2-NEXT: store volatile double [[TMP22]], double* [[TMP2]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK2-NEXT: store float [[TMP24]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP24]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -1484,12 +1484,12 @@ int main() { // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1610,7 +1610,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -1635,7 +1635,7 @@ int main() { // CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -1776,7 +1776,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1801,7 +1801,7 @@ int main() { // CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK5-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -1992,7 +1992,7 @@ int main() { // CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2105,7 +2105,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) @@ -2265,7 +2265,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -2509,12 +2509,12 @@ int main() { // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK6-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK6-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -2635,7 +2635,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -2660,7 +2660,7 @@ int main() { // CHECK6-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK6-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK6-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -2801,7 +2801,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -2826,7 +2826,7 @@ int main() { // CHECK6-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK6-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK6-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -3017,7 +3017,7 @@ int main() { // CHECK6-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK6-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -3130,7 +3130,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK6-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) @@ -3290,7 +3290,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -5370,4 +5370,3 @@ int main() { // CHECK8-NEXT: call void @__tgt_register_requires(i64 1) // CHECK8-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index 364dac905a3d..86235945dd3b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -7797,7 +7797,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -7832,7 +7832,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -7878,7 +7878,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -7931,7 +7931,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8019,7 +8019,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8054,7 +8054,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8100,7 +8100,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8153,7 +8153,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8245,11 +8245,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8288,7 +8288,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8305,7 +8305,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -8336,11 +8336,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8419,7 +8419,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8507,7 +8507,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8542,7 +8542,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8588,7 +8588,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8641,7 +8641,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8729,11 +8729,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8772,7 +8772,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8818,11 +8818,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8878,7 +8878,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8901,7 +8901,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9413,7 +9413,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9473,7 +9473,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9526,7 +9526,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -9752,7 +9752,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9812,7 +9812,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9865,7 +9865,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10316,7 +10316,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10351,7 +10351,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10397,7 +10397,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10450,7 +10450,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10538,7 +10538,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10573,7 +10573,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10619,7 +10619,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10672,7 +10672,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10764,11 +10764,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10807,7 +10807,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10824,7 +10824,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -10855,11 +10855,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10938,7 +10938,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11026,7 +11026,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -11061,7 +11061,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11107,7 +11107,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -11160,7 +11160,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11248,11 +11248,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11291,7 +11291,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11337,11 +11337,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11397,7 +11397,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11420,7 +11420,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11932,7 +11932,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11992,7 +11992,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12045,7 +12045,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -12271,7 +12271,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12331,7 +12331,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12384,7 +12384,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17669,7 +17669,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -17704,7 +17704,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -17750,7 +17750,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -17803,7 +17803,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -17891,7 +17891,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -17926,7 +17926,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -17972,7 +17972,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18025,7 +18025,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18117,11 +18117,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18160,7 +18160,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18177,7 +18177,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -18208,11 +18208,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18291,7 +18291,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18379,7 +18379,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18414,7 +18414,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18460,7 +18460,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18513,7 +18513,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18601,11 +18601,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18644,7 +18644,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18690,11 +18690,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18750,7 +18750,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18773,7 +18773,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -19285,7 +19285,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19345,7 +19345,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19398,7 +19398,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -19624,7 +19624,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19684,7 +19684,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19737,7 +19737,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20188,7 +20188,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20223,7 +20223,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20269,7 +20269,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20322,7 +20322,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20410,7 +20410,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20445,7 +20445,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20491,7 +20491,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20544,7 +20544,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20636,11 +20636,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20679,7 +20679,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20696,7 +20696,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -20727,11 +20727,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK18-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20810,7 +20810,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20898,7 +20898,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20933,7 +20933,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20979,7 +20979,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21032,7 +21032,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21120,11 +21120,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21163,7 +21163,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21209,11 +21209,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21269,7 +21269,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21292,7 +21292,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21804,7 +21804,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21864,7 +21864,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21917,7 +21917,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -22143,7 +22143,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -22203,7 +22203,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -22256,7 +22256,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index 7b289aa7a04a..bad61fd7a674 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -261,14 +261,14 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -304,7 +304,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -356,7 +356,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -367,7 +367,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -383,7 +383,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -424,7 +424,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -438,9 +438,9 @@ int target_teams_fun(int *g){ // CHECK1: omp.precond.then: // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -496,7 +496,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -512,7 +512,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -538,7 +538,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -573,7 +573,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -610,28 +610,28 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -684,7 +684,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -727,30 +727,30 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -972,14 +972,14 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1015,7 +1015,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1067,7 +1067,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1078,7 +1078,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1094,7 +1094,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1135,7 +1135,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1149,9 +1149,9 @@ int target_teams_fun(int *g){ // CHECK2: omp.precond.then: // CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK2-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -1207,7 +1207,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1223,7 +1223,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK2-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -1249,7 +1249,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1284,7 +1284,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1321,28 +1321,28 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1395,7 +1395,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1438,30 +1438,30 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1772,7 +1772,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1896,7 +1896,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2007,25 +2007,25 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2118,29 +2118,29 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2451,7 +2451,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2575,7 +2575,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2686,25 +2686,25 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !12 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2797,29 +2797,29 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3475,14 +3475,14 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3518,7 +3518,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3570,7 +3570,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3581,7 +3581,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3597,7 +3597,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -3638,7 +3638,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3652,9 +3652,9 @@ int target_teams_fun(int *g){ // CHECK9: omp.precond.then: // CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -3710,7 +3710,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3726,7 +3726,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -3752,7 +3752,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3787,7 +3787,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3824,28 +3824,28 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !12 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3898,7 +3898,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3941,30 +3941,30 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !16 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !16 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4009,14 +4009,14 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4052,7 +4052,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4104,7 +4104,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4115,7 +4115,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4131,7 +4131,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -4172,7 +4172,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4186,9 +4186,9 @@ int target_teams_fun(int *g){ // CHECK10: omp.precond.then: // CHECK10-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK10-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -4244,7 +4244,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4260,7 +4260,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK10-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -4286,7 +4286,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4321,7 +4321,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4358,28 +4358,28 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !12 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !12 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4432,7 +4432,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4475,30 +4475,30 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !16 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !16 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4637,7 +4637,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4761,7 +4761,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4872,25 +4872,25 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4983,29 +4983,29 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !17 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -5144,7 +5144,7 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5268,7 +5268,7 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5379,25 +5379,25 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5490,29 +5490,29 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !17 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp index 79b93d237433..bdd4d313ede1 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -195,23 +195,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -278,40 +278,40 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -430,23 +430,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -513,40 +513,40 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -665,21 +665,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -744,38 +744,38 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -894,21 +894,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -973,38 +973,38 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK4-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1461,11 +1461,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1511,9 +1511,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1561,29 +1561,29 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -1653,9 +1653,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1707,13 +1707,13 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 // CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -1722,16 +1722,16 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -1741,23 +1741,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 // CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -1870,23 +1870,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -1953,39 +1953,39 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2162,11 +2162,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -2212,9 +2212,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -2262,29 +2262,29 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK10-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4, !llvm.access.group !5 // CHECK10-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !5 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK10-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2354,9 +2354,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -2408,13 +2408,13 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 // CHECK10-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK10-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK10-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -2423,16 +2423,16 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK10-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK10-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK10-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK10-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK10-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK10-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK10-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK10-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK10-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -2442,23 +2442,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK10-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK10-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK10-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK10-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP28]] -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 // CHECK10-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4, !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK10-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 // CHECK10-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK10-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK10-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2571,23 +2571,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !14 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2654,39 +2654,39 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK10-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK10-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2954,29 +2954,29 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] -// CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3100,13 +3100,13 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 // CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -3115,16 +3115,16 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -3134,21 +3134,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 // CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3261,21 +3261,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3340,37 +3340,37 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3638,29 +3638,29 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK12-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !6 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK12-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] -// CHECK12-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK12-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -3784,13 +3784,13 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 // CHECK12-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK12-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK12-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK12-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -3799,16 +3799,16 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK12-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK12-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK12-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK12-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK12-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK12-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK12-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK12-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK12-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -3818,21 +3818,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK12-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK12-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK12-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK12-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP28]] -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4, !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK12-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 // CHECK12-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK12-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK12-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -3945,21 +3945,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -4024,37 +4024,37 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK12-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK12-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index 60737285e51b..1fe8bf844358 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -272,23 +272,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -352,28 +352,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -437,23 +437,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -517,28 +517,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -602,43 +602,43 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -702,28 +702,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -883,23 +883,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -963,28 +963,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1048,23 +1048,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1128,28 +1128,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1213,43 +1213,43 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1313,28 +1313,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1494,21 +1494,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1570,27 +1570,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1654,21 +1654,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1730,27 +1730,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1814,41 +1814,41 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1910,27 +1910,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2090,21 +2090,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2166,27 +2166,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2250,21 +2250,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2326,27 +2326,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2410,41 +2410,41 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK4: cond.true5: // CHECK4-NEXT: br label [[COND_END7:%.*]] // CHECK4: cond.false6: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: br label [[COND_END7]] // CHECK4: cond.end7: // CHECK4-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2506,27 +2506,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3295,7 +3295,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3330,7 +3330,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3367,27 +3367,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3441,7 +3441,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3484,27 +3484,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3541,7 +3541,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3576,7 +3576,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3613,27 +3613,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3687,7 +3687,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3730,27 +3730,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3791,11 +3791,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3834,7 +3834,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3851,7 +3851,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -3872,55 +3872,55 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK9: cond.true14: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END16:%.*]] // CHECK9: cond.false15: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END16]] // CHECK9: cond.end16: // CHECK9-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK9-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3977,7 +3977,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4020,27 +4020,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4206,23 +4206,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4286,27 +4286,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4370,23 +4370,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4450,27 +4450,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4495,7 +4495,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4528,7 +4528,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -4547,47 +4547,47 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK9: cond.true6: // CHECK9-NEXT: br label [[COND_END8:%.*]] // CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[COND_END8]] // CHECK9: cond.end8: // CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4654,27 +4654,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4949,7 +4949,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4984,7 +4984,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5021,27 +5021,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5095,7 +5095,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5138,27 +5138,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5195,7 +5195,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -5230,7 +5230,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5267,27 +5267,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5341,7 +5341,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5384,27 +5384,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5445,11 +5445,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -5488,7 +5488,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5505,7 +5505,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -5526,55 +5526,55 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK10-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK10-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK10: cond.true14: -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END16:%.*]] // CHECK10: cond.false15: -// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END16]] // CHECK10: cond.end16: // CHECK10-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK10-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5631,7 +5631,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5674,27 +5674,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5860,23 +5860,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5940,27 +5940,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6024,23 +6024,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6104,27 +6104,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6149,7 +6149,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -6182,7 +6182,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -6201,47 +6201,47 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK10: cond.true6: // CHECK10-NEXT: br label [[COND_END8:%.*]] // CHECK10: cond.false7: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[COND_END8]] // CHECK10: cond.end8: // CHECK10-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6308,27 +6308,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6670,24 +6670,24 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6781,26 +6781,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6906,24 +6906,24 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7017,26 +7017,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7152,51 +7152,51 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: // CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7292,26 +7292,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7476,21 +7476,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7552,26 +7552,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7635,21 +7635,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7711,26 +7711,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7804,44 +7804,44 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: // CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7905,26 +7905,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -8266,24 +8266,24 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8377,26 +8377,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8502,24 +8502,24 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8613,26 +8613,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8748,51 +8748,51 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK12-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK12-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK12: cond.true11: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13:%.*]] // CHECK12: cond.false12: -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13]] // CHECK12: cond.end13: // CHECK12-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8888,26 +8888,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9072,21 +9072,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9148,26 +9148,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9231,21 +9231,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9307,26 +9307,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9400,44 +9400,44 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK12: cond.true5: // CHECK12-NEXT: br label [[COND_END7:%.*]] // CHECK12: cond.false6: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[COND_END7]] // CHECK12: cond.end7: // CHECK12-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9501,26 +9501,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 501b5b289eb2..e900eb9a3323 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -404,11 +404,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -496,33 +496,33 @@ int main() { // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -671,40 +671,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -904,7 +904,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -994,30 +994,30 @@ int main() { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1148,37 +1148,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1494,11 +1494,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1586,33 +1586,33 @@ int main() { // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1761,40 +1761,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1994,7 +1994,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2084,30 +2084,30 @@ int main() { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2238,37 +2238,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2668,29 +2668,29 @@ int main() { // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2835,38 +2835,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3152,27 +3152,27 @@ int main() { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3300,35 +3300,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3728,29 +3728,29 @@ int main() { // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3895,38 +3895,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4212,27 +4212,27 @@ int main() { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4360,35 +4360,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK4-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK4-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4638,7 +4638,7 @@ int main() { // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4647,7 +4647,7 @@ int main() { // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4705,36 +4705,36 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4807,35 +4807,35 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -5006,7 +5006,7 @@ int main() { // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK6-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5015,7 +5015,7 @@ int main() { // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5073,36 +5073,36 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5175,35 +5175,35 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK6-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK6-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK6-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK6-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -6818,11 +6818,11 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6910,33 +6910,33 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7085,40 +7085,40 @@ int main() { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK13-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7174,7 +7174,7 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -7264,30 +7264,30 @@ int main() { // CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7418,37 +7418,37 @@ int main() { // CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK13-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK13-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7585,11 +7585,11 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -7677,33 +7677,33 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -7852,40 +7852,40 @@ int main() { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK14-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK14-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -7941,7 +7941,7 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK14-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -8031,30 +8031,30 @@ int main() { // CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !15 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !15 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -8185,37 +8185,37 @@ int main() { // CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK14-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK14-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK14-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !18 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -8438,29 +8438,29 @@ int main() { // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -8605,38 +8605,38 @@ int main() { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK15-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK15-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !11 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -8779,27 +8779,27 @@ int main() { // CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -8927,35 +8927,35 @@ int main() { // CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK15-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -9178,29 +9178,29 @@ int main() { // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !7 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9345,38 +9345,38 @@ int main() { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK16-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK16-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !11 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9519,27 +9519,27 @@ int main() { // CHECK16-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !16 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9667,35 +9667,35 @@ int main() { // CHECK16-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK16-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK16-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK16-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9829,7 +9829,7 @@ int main() { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -9838,7 +9838,7 @@ int main() { // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -9896,36 +9896,36 @@ int main() { // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -9998,35 +9998,35 @@ int main() { // CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] +// CHECK17-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index f21b6fa5ae21..a2844f97aa93 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -157,7 +157,7 @@ int main() { // CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -205,26 +205,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -289,23 +289,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -365,26 +365,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -446,23 +446,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -587,22 +587,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -664,23 +664,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -740,26 +740,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -821,23 +821,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -860,7 +860,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -910,35 +910,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !36 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1000,23 +1000,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1130,22 +1130,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1207,23 +1207,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1283,26 +1283,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1364,23 +1364,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1403,7 +1403,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -1453,35 +1453,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1543,23 +1543,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1631,7 +1631,7 @@ int main() { // CHECK2-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -1679,26 +1679,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1763,23 +1763,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1839,26 +1839,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1920,23 +1920,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2061,22 +2061,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2138,23 +2138,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2214,26 +2214,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2295,23 +2295,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2334,7 +2334,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2384,35 +2384,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !36 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2474,23 +2474,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2604,22 +2604,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2681,23 +2681,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2757,26 +2757,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2838,23 +2838,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2877,7 +2877,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2927,35 +2927,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3017,23 +3017,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3105,7 +3105,7 @@ int main() { // CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -3153,26 +3153,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !9 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3237,23 +3237,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3313,26 +3313,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3394,23 +3394,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3535,22 +3535,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3612,23 +3612,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3808,7 +3808,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -3859,47 +3859,47 @@ int main() { // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] @@ -3915,13 +3915,13 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: @@ -3989,7 +3989,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4011,23 +4011,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4117,7 +4117,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4139,23 +4139,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4312,22 +4312,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4389,23 +4389,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4585,7 +4585,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -4635,35 +4635,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4725,23 +4725,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4813,7 +4813,7 @@ int main() { // CHECK4-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -4861,26 +4861,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !9 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4945,23 +4945,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5021,26 +5021,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5102,23 +5102,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5243,22 +5243,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5320,23 +5320,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5516,7 +5516,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -5567,47 +5567,47 @@ int main() { // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] @@ -5623,13 +5623,13 @@ int main() { // CHECK4-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK4-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK4-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK4: omp_if.then15: @@ -5697,7 +5697,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5719,23 +5719,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5825,7 +5825,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5847,23 +5847,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -6020,22 +6020,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6097,23 +6097,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6293,7 +6293,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -6343,35 +6343,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6433,23 +6433,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6491,23 +6491,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6517,23 +6517,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z9gtid_testv() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6567,23 +6567,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn4v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6593,23 +6593,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn5v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6623,23 +6623,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn6v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6675,23 +6675,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn1v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6701,23 +6701,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn2v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6731,23 +6731,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn3v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6773,23 +6773,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6799,23 +6799,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z9gtid_testv() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6849,23 +6849,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn4v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6875,23 +6875,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn5v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6905,23 +6905,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn6v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6957,23 +6957,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn1v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6983,23 +6983,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn2v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7013,23 +7013,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn3v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7055,23 +7055,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7081,23 +7081,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK7-NEXT: call void @_Z9gtid_testv() +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7131,23 +7131,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn4v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7191,23 +7191,23 @@ int main() { // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn6v() +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7268,23 +7268,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn1v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7324,23 +7324,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn3v() +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !23 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7366,23 +7366,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7392,23 +7392,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK8-NEXT: call void @_Z9gtid_testv() +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7442,23 +7442,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn4v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !10 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7502,23 +7502,23 @@ int main() { // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn6v() +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7579,23 +7579,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn1v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !19 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7635,23 +7635,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn3v() +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !23 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7707,7 +7707,7 @@ int main() { // CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -7755,26 +7755,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7839,23 +7839,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7915,26 +7915,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7996,23 +7996,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8137,22 +8137,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8214,23 +8214,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8290,26 +8290,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8371,23 +8371,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8410,7 +8410,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -8460,35 +8460,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !40 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8550,23 +8550,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8680,22 +8680,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8757,23 +8757,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8833,26 +8833,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8914,23 +8914,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8953,7 +8953,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9003,35 +9003,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9093,23 +9093,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9181,7 +9181,7 @@ int main() { // CHECK10-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -9229,26 +9229,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9313,23 +9313,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9389,26 +9389,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9470,23 +9470,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9611,22 +9611,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9688,23 +9688,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9764,26 +9764,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9845,23 +9845,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9884,7 +9884,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9934,35 +9934,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !40 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10024,23 +10024,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10154,22 +10154,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10231,23 +10231,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10307,26 +10307,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10388,23 +10388,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10427,7 +10427,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10477,35 +10477,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10567,23 +10567,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10655,7 +10655,7 @@ int main() { // CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -10703,26 +10703,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !13 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10787,23 +10787,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10863,26 +10863,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10944,23 +10944,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11085,22 +11085,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11162,23 +11162,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11358,7 +11358,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -11409,47 +11409,47 @@ int main() { // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] @@ -11465,13 +11465,13 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK11-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: @@ -11539,7 +11539,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11561,23 +11561,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11667,7 +11667,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11689,23 +11689,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11862,22 +11862,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11939,23 +11939,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12135,7 +12135,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -12185,35 +12185,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12275,23 +12275,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12363,7 +12363,7 @@ int main() { // CHECK12-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK12-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -12411,26 +12411,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !13 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12495,23 +12495,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12571,26 +12571,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12652,23 +12652,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12793,22 +12793,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12870,23 +12870,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13066,7 +13066,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -13117,47 +13117,47 @@ int main() { // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] @@ -13173,13 +13173,13 @@ int main() { // CHECK12-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK12-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK12-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK12-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK12: omp_if.then15: @@ -13247,7 +13247,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13269,23 +13269,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13375,7 +13375,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13397,23 +13397,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13570,22 +13570,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13647,23 +13647,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13843,7 +13843,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -13893,35 +13893,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13983,23 +13983,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -14041,23 +14041,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14067,23 +14067,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z9gtid_testv() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14117,23 +14117,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn4v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14143,23 +14143,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn5v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14173,23 +14173,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn6v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14225,23 +14225,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn1v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14251,23 +14251,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn2v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14281,23 +14281,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn3v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14323,23 +14323,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14349,23 +14349,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z9gtid_testv() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14399,23 +14399,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn4v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14425,23 +14425,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn5v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14455,23 +14455,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn6v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14507,23 +14507,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn1v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14533,23 +14533,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn2v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14563,23 +14563,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn3v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14605,23 +14605,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14631,23 +14631,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK15-NEXT: call void @_Z9gtid_testv() +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14681,23 +14681,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn4v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14741,23 +14741,23 @@ int main() { // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn6v() +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14818,23 +14818,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn1v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14874,23 +14874,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn3v() +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !27 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14916,23 +14916,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14942,23 +14942,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK16-NEXT: call void @_Z9gtid_testv() +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14992,23 +14992,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn4v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !14 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -15052,23 +15052,23 @@ int main() { // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn6v() +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -15129,23 +15129,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn1v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !23 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -15185,23 +15185,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn3v() +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !27 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp index 07d47f3a78f4..03e4bae0c9bb 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -213,11 +213,11 @@ int main() { // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -290,40 +290,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP13]], double* [[CONV11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK1-NEXT: store double [[TMP13]], double* [[CONV11]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV12:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV13:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP17]], float* [[CONV13]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8 +// CHECK1-NEXT: store float [[TMP17]], float* [[CONV13]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV14:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP19]], double* [[CONV14]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK1-NEXT: store double [[TMP19]], double* [[CONV14]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -345,9 +345,9 @@ int main() { // CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 // CHECK1-NEXT: store volatile double [[TMP29]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP31]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP31]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -421,37 +421,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR10]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G7]], double** [[TMP12]], align 8 +// CHECK1-NEXT: store double* [[G7]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -473,9 +473,9 @@ int main() { // CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 // CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP2]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: store float [[TMP26]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP26]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -532,11 +532,11 @@ int main() { // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -609,40 +609,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV11:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP13]], double* [[CONV11]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK2-NEXT: store double [[TMP13]], double* [[CONV11]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV12:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV13:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK2-NEXT: store float [[TMP17]], float* [[CONV13]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8 +// CHECK2-NEXT: store float [[TMP17]], float* [[CONV13]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV14:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP19]], double* [[CONV14]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK2-NEXT: store double [[TMP19]], double* [[CONV14]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -664,9 +664,9 @@ int main() { // CHECK2-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 // CHECK2-NEXT: store volatile double [[TMP29]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP31]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP31]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -740,37 +740,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 -// CHECK2-NEXT: store i32 3, i32* [[SVAR10]], align 4 -// CHECK2-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store i32 3, i32* [[SVAR10]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double* [[G7]], double** [[TMP12]], align 8 +// CHECK2-NEXT: store double* [[G7]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK2-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8 +// CHECK2-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -792,9 +792,9 @@ int main() { // CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 // CHECK2-NEXT: store volatile double [[TMP24]], double* [[TMP2]], align 8 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK2-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK2-NEXT: store float [[TMP26]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP26]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -913,29 +913,29 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CONV8:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP15]], float* [[CONV8]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]) +// CHECK3-NEXT: store float [[TMP15]], float* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1030,37 +1030,37 @@ int main() { // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1203,29 +1203,29 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CONV8:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK4-NEXT: store float [[TMP15]], float* [[CONV8]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]) +// CHECK4-NEXT: store float [[TMP15]], float* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]), !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1320,37 +1320,37 @@ int main() { // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4 -// CHECK4-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK4-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double* [[G2]], double** [[TMP14]], align 4 +// CHECK4-NEXT: store double* [[G2]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK4-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4 +// CHECK4-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1540,12 +1540,12 @@ int main() { // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1627,34 +1627,34 @@ int main() { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[CONV10:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]) +// CHECK5-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]), !llvm.access.group !5 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -1673,7 +1673,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) @@ -1698,7 +1698,7 @@ int main() { // CHECK5-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -1797,37 +1797,37 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC6]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i64 0, i64 [[IDXPROM12]] // CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX13]] to i8* // CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -1846,7 +1846,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -1871,7 +1871,7 @@ int main() { // CHECK5-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK5-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -2062,7 +2062,7 @@ int main() { // CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2140,30 +2140,30 @@ int main() { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]) +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]), !llvm.access.group !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -2182,7 +2182,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -2300,37 +2300,37 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i64 0, i64 [[IDXPROM10]] // CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -2349,7 +2349,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -2593,12 +2593,12 @@ int main() { // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK6-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK6-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -2680,34 +2680,34 @@ int main() { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[CONV10:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]) +// CHECK6-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]), !llvm.access.group !5 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -2726,7 +2726,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK6-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) @@ -2751,7 +2751,7 @@ int main() { // CHECK6-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK6-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK6-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -2850,37 +2850,37 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC6]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 // CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i64 0, i64 [[IDXPROM12]] // CHECK6-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX13]] to i8* // CHECK6-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -2899,7 +2899,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -2924,7 +2924,7 @@ int main() { // CHECK6-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK6-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK6-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -3115,7 +3115,7 @@ int main() { // CHECK6-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK6-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -3193,30 +3193,30 @@ int main() { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]) +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]), !llvm.access.group !14 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3235,7 +3235,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -3353,37 +3353,37 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 // CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i64 0, i64 [[IDXPROM10]] // CHECK6-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK6-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !17 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3402,7 +3402,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -3725,30 +3725,30 @@ int main() { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -3887,35 +3887,35 @@ int main() { // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -4224,27 +4224,27 @@ int main() { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]), !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -4378,35 +4378,35 @@ int main() { // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -4748,30 +4748,30 @@ int main() { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK8-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]) +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]), !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -4910,35 +4910,35 @@ int main() { // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK8-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -5247,27 +5247,27 @@ int main() { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]) +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]), !llvm.access.group !15 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -5401,35 +5401,35 @@ int main() { // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index da7a6b1712ce..c28b90653caa 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -9127,7 +9127,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9162,7 +9162,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9208,7 +9208,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -9273,7 +9273,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9373,7 +9373,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9408,7 +9408,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9454,7 +9454,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -9519,7 +9519,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9623,11 +9623,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9666,7 +9666,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9683,7 +9683,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -9714,11 +9714,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -9809,7 +9809,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9909,7 +9909,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9944,7 +9944,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9990,7 +9990,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -10055,7 +10055,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10155,11 +10155,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10198,7 +10198,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10244,11 +10244,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -10316,7 +10316,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10339,7 +10339,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10891,7 +10891,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10951,7 +10951,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -11011,7 +11011,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -11258,7 +11258,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11318,7 +11318,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -11378,7 +11378,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11836,7 +11836,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -11871,7 +11871,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11917,7 +11917,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -11982,7 +11982,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12082,7 +12082,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -12117,7 +12117,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12163,7 +12163,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -12228,7 +12228,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12332,11 +12332,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12375,7 +12375,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12392,7 +12392,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -12423,11 +12423,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -12518,7 +12518,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12618,7 +12618,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -12653,7 +12653,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12699,7 +12699,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -12764,7 +12764,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12864,11 +12864,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12907,7 +12907,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12953,11 +12953,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -13025,7 +13025,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -13048,7 +13048,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13600,7 +13600,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -13660,7 +13660,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -13720,7 +13720,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -13967,7 +13967,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -14027,7 +14027,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -14087,7 +14087,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -19759,7 +19759,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19794,7 +19794,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -19840,7 +19840,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -19905,7 +19905,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20005,7 +20005,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20040,7 +20040,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20086,7 +20086,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -20151,7 +20151,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20255,11 +20255,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20298,7 +20298,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20315,7 +20315,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -20346,11 +20346,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -20441,7 +20441,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20541,7 +20541,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20576,7 +20576,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20622,7 +20622,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -20687,7 +20687,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20787,11 +20787,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20830,7 +20830,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20876,11 +20876,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -20948,7 +20948,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20971,7 +20971,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21523,7 +21523,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21583,7 +21583,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -21643,7 +21643,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -21890,7 +21890,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21950,7 +21950,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -22010,7 +22010,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -22468,7 +22468,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -22503,7 +22503,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22549,7 +22549,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -22614,7 +22614,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22714,7 +22714,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -22749,7 +22749,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22795,7 +22795,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -22860,7 +22860,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22964,11 +22964,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -23007,7 +23007,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23024,7 +23024,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -23055,11 +23055,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -23150,7 +23150,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23250,7 +23250,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -23285,7 +23285,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23331,7 +23331,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -23396,7 +23396,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23496,11 +23496,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -23539,7 +23539,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23585,11 +23585,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -23657,7 +23657,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23680,7 +23680,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -24232,7 +24232,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -24292,7 +24292,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -24352,7 +24352,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -24599,7 +24599,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -24659,7 +24659,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -24719,7 +24719,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 04016eb544e0..5aa6962c06de 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -629,10 +629,10 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -824,7 +824,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -899,7 +899,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void @@ -912,7 +912,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -968,11 +968,11 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1006,11 +1006,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1069,14 +1069,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1130,7 +1130,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1212,9 +1212,9 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -1625,7 +1625,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1693,7 +1693,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1751,19 +1751,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1804,9 +1804,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1859,19 +1859,19 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !41 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -1925,11 +1925,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1991,14 +1991,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2368,10 +2368,10 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2563,7 +2563,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2638,7 +2638,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK2-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: ret void @@ -2651,7 +2651,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2707,11 +2707,11 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2745,11 +2745,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2808,14 +2808,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2869,7 +2869,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2951,9 +2951,9 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -3364,7 +3364,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3432,7 +3432,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3490,19 +3490,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3543,9 +3543,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3598,19 +3598,19 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !41 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK2-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK2-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -3664,11 +3664,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3730,14 +3730,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -4103,7 +4103,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4378,7 +4378,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4434,11 +4434,11 @@ int bar(int n){ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4474,7 +4474,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4535,11 +4535,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5208,11 +5208,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5309,16 +5309,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -5374,7 +5374,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5438,11 +5438,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5808,7 +5808,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6083,7 +6083,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6139,11 +6139,11 @@ int bar(int n){ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6179,7 +6179,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6240,11 +6240,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6913,11 +6913,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -7014,16 +7014,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK4-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -7079,7 +7079,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7143,11 +7143,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7517,10 +7517,10 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK5-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7712,7 +7712,7 @@ int bar(int n){ // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7787,7 +7787,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void @@ -7800,7 +7800,7 @@ int bar(int n){ // CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7856,11 +7856,11 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK5-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7894,11 +7894,11 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7957,14 +7957,14 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8018,7 +8018,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8100,9 +8100,9 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -8539,11 +8539,11 @@ int bar(int n){ // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -8605,7 +8605,7 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -8620,7 +8620,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8656,7 +8656,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK5-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK5-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK5-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8716,19 +8716,19 @@ int bar(int n){ // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK5-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8769,9 +8769,9 @@ int bar(int n){ // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -8824,19 +8824,19 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK5-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !44 // CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -8890,11 +8890,11 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8956,14 +8956,14 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -9333,10 +9333,10 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK6-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9528,7 +9528,7 @@ int bar(int n){ // CHECK6-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9603,7 +9603,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK6-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: -// CHECK6-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK6: .omp.final.done: // CHECK6-NEXT: ret void @@ -9616,7 +9616,7 @@ int bar(int n){ // CHECK6-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9672,11 +9672,11 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK6-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK6-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK6-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK6-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9710,11 +9710,11 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9773,14 +9773,14 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9834,7 +9834,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK6-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK6-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9916,9 +9916,9 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -10355,11 +10355,11 @@ int bar(int n){ // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK6-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10421,7 +10421,7 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -10436,7 +10436,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK6-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10472,7 +10472,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK6-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK6-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK6-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10532,19 +10532,19 @@ int bar(int n){ // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK6-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10585,9 +10585,9 @@ int bar(int n){ // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -10640,19 +10640,19 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK6-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !44 // CHECK6-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK6-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK6-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK6-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK6-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK6-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -10706,11 +10706,11 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10772,14 +10772,14 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !47 -// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -11145,7 +11145,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK7-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11420,7 +11420,7 @@ int bar(int n){ // CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11476,11 +11476,11 @@ int bar(int n){ // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK7-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11516,7 +11516,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11577,11 +11577,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12151,7 +12151,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -12212,7 +12212,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: @@ -12327,11 +12327,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK7-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -12428,16 +12428,16 @@ int bar(int n){ // CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK7-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK7-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -12493,7 +12493,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12557,11 +12557,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12927,7 +12927,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK8-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13202,7 +13202,7 @@ int bar(int n){ // CHECK8-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13258,11 +13258,11 @@ int bar(int n){ // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK8-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK8-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK8-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK8-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13298,7 +13298,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13359,11 +13359,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13933,7 +13933,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK8-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -13994,7 +13994,7 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK8-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK8: omp_if.then: @@ -14109,11 +14109,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK8-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -14210,16 +14210,16 @@ int bar(int n){ // CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK8-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK8-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK8-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK8-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !45 -// CHECK8-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK8-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK8-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK8-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK8-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -14275,7 +14275,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14339,11 +14339,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 -// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18655,10 +18655,10 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18743,7 +18743,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18799,11 +18799,11 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18837,11 +18837,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18900,14 +18900,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18961,7 +18961,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -19043,9 +19043,9 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -19123,19 +19123,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -19176,9 +19176,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -19231,19 +19231,19 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -19302,7 +19302,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -19370,7 +19370,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -19420,11 +19420,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19486,14 +19486,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -19534,10 +19534,10 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19622,7 +19622,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19678,11 +19678,11 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19716,11 +19716,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19779,14 +19779,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19840,7 +19840,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -19922,9 +19922,9 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -20002,19 +20002,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -20055,9 +20055,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -20110,19 +20110,19 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK18-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK18-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK18-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK18-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -20181,7 +20181,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -20249,7 +20249,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -20299,11 +20299,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -20365,14 +20365,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -20414,7 +20414,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20499,7 +20499,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20555,11 +20555,11 @@ int bar(int n){ // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20595,7 +20595,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20656,11 +20656,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20877,11 +20877,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -20978,16 +20978,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK19-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK19-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -21163,7 +21163,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21227,11 +21227,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -21273,7 +21273,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21358,7 +21358,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21414,11 +21414,11 @@ int bar(int n){ // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21454,7 +21454,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21515,11 +21515,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21736,11 +21736,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -21837,16 +21837,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK20-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK20-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK20-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK20-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -22022,7 +22022,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -22086,11 +22086,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -22131,10 +22131,10 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK21-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22219,7 +22219,7 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22275,11 +22275,11 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK21-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22313,11 +22313,11 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22376,14 +22376,14 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22437,7 +22437,7 @@ int bar(int n){ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -22519,9 +22519,9 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -22599,19 +22599,19 @@ int bar(int n){ // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK21-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK21-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -22652,9 +22652,9 @@ int bar(int n){ // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -22707,19 +22707,19 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK21-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -22782,11 +22782,11 @@ int bar(int n){ // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -22848,7 +22848,7 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -22863,7 +22863,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK21-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22899,7 +22899,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK21-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK21-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK21-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22951,11 +22951,11 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23017,14 +23017,14 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -23065,10 +23065,10 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK22-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23153,7 +23153,7 @@ int bar(int n){ // CHECK22-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK22-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23209,11 +23209,11 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK22-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK22-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK22-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK22-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK22-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK22-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23247,11 +23247,11 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23310,14 +23310,14 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23371,7 +23371,7 @@ int bar(int n){ // CHECK22-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK22-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK22-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -23453,9 +23453,9 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK22-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK22-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -23533,19 +23533,19 @@ int bar(int n){ // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK22-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK22-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK22-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -23586,9 +23586,9 @@ int bar(int n){ // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK22-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -23641,19 +23641,19 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK22-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK22-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK22-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK22-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK22-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK22-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK22-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK22-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK22-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK22-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK22-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -23716,11 +23716,11 @@ int bar(int n){ // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK22-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK22-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -23782,7 +23782,7 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK22-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -23797,7 +23797,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK22-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK22-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -23833,7 +23833,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK22-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK22-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK22-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -23885,11 +23885,11 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23951,14 +23951,14 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !35 -// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -24000,7 +24000,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK23-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24085,7 +24085,7 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24141,11 +24141,11 @@ int bar(int n){ // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK23-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -24181,7 +24181,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24242,11 +24242,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -24463,11 +24463,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK23-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -24564,16 +24564,16 @@ int bar(int n){ // CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK23-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK23-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -24638,7 +24638,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -24699,7 +24699,7 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -24804,7 +24804,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24868,11 +24868,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -24914,7 +24914,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK24-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24999,7 +24999,7 @@ int bar(int n){ // CHECK24-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK24-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK24-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -25055,11 +25055,11 @@ int bar(int n){ // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK24-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK24-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK24-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK24-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -25095,7 +25095,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -25156,11 +25156,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -25377,11 +25377,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK24-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK24-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -25478,16 +25478,16 @@ int bar(int n){ // CHECK24-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK24-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK24-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK24-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK24-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK24-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK24-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK24-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK24-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK24-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK24-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -25552,7 +25552,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK24-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -25613,7 +25613,7 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK24-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -25718,7 +25718,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -25782,11 +25782,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp index 89a4e1d65a4f..6e3c19d4421f 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -1132,11 +1132,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1180,9 +1180,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1602,11 +1602,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1650,9 +1650,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp index 1a8ace57e558..fb0cb638f50c 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -2333,7 +2333,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2367,7 +2367,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2461,7 +2461,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2495,7 +2495,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2593,11 +2593,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2634,7 +2634,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2651,7 +2651,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -3335,7 +3335,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3369,7 +3369,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3463,7 +3463,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3497,7 +3497,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3595,11 +3595,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3636,7 +3636,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3653,7 +3653,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp index ce209edf36ac..138ff30b199a 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -345,11 +345,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -446,7 +446,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -458,9 +458,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -701,7 +701,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -801,7 +801,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1149,11 +1149,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1250,7 +1250,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1262,9 +1262,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1505,7 +1505,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1605,7 +1605,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -4759,7 +4759,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4768,7 +4768,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4833,10 +4833,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5023,7 +5023,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5032,7 +5032,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5097,10 +5097,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp index db176f546888..f62acbff4b39 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -182,11 +182,11 @@ int main() { // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -305,9 +305,9 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP24]], float* [[CONV3]], align 8 +// CHECK1-NEXT: store float [[TMP24]], float* [[CONV3]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -368,11 +368,11 @@ int main() { // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -491,9 +491,9 @@ int main() { // CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK2-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP24]], float* [[CONV3]], align 8 +// CHECK2-NEXT: store float [[TMP24]], float* [[CONV3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -1077,12 +1077,12 @@ int main() { // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1211,7 +1211,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1236,7 +1236,7 @@ int main() { // CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] @@ -1427,7 +1427,7 @@ int main() { // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1553,7 +1553,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1797,12 +1797,12 @@ int main() { // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1931,7 +1931,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1956,7 +1956,7 @@ int main() { // CHECK10-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK10-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK10-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK10: .omp.lastprivate.done: // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] @@ -2147,7 +2147,7 @@ int main() { // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2273,7 +2273,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp index 5122779d1ddb..2508c7468749 100644 --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -450,9 +450,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -473,7 +473,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -515,7 +515,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -538,7 +538,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -588,14 +588,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -616,11 +616,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -907,9 +907,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -930,7 +930,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -972,7 +972,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -995,7 +995,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1045,14 +1045,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1073,11 +1073,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1490,13 +1490,13 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1516,7 +1516,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1933,13 +1933,13 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1959,7 +1959,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1981,7 +1981,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2004,7 +2004,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2034,9 +2034,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2057,7 +2057,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2126,14 +2126,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2154,11 +2154,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2169,7 +2169,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2192,7 +2192,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2222,9 +2222,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2245,7 +2245,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2314,14 +2314,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2342,11 +2342,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2495,13 +2495,13 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2521,7 +2521,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2674,13 +2674,13 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2700,7 +2700,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2984,9 +2984,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3007,7 +3007,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3049,7 +3049,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3072,7 +3072,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3122,14 +3122,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3150,11 +3150,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3441,9 +3441,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3464,7 +3464,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3506,7 +3506,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3529,7 +3529,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3579,14 +3579,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3607,11 +3607,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4024,13 +4024,13 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4050,7 +4050,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4467,13 +4467,13 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4493,7 +4493,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4515,7 +4515,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4538,7 +4538,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4568,9 +4568,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4591,7 +4591,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4660,14 +4660,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4688,11 +4688,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -4703,7 +4703,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4726,7 +4726,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4756,9 +4756,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4779,7 +4779,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4848,14 +4848,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4876,11 +4876,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5029,13 +5029,13 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5055,7 +5055,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5208,13 +5208,13 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5234,11 +5234,10 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp index 51663d0feb22..860b6ee596dc 100644 --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -468,9 +468,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -491,7 +491,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -536,8 +536,8 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -560,7 +560,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -610,14 +610,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -638,11 +638,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -947,9 +947,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -970,7 +970,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1015,8 +1015,8 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1039,7 +1039,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1089,14 +1089,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1117,11 +1117,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1554,13 +1554,13 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1580,7 +1580,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2017,13 +2017,13 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2043,7 +2043,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2068,8 +2068,8 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2092,7 +2092,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2122,9 +2122,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2145,7 +2145,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2214,14 +2214,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2242,11 +2242,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2260,8 +2260,8 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2284,7 +2284,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2314,9 +2314,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2337,7 +2337,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2406,14 +2406,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2434,11 +2434,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2590,13 +2590,13 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2616,7 +2616,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2772,13 +2772,13 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2798,7 +2798,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -3100,9 +3100,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3123,7 +3123,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3168,8 +3168,8 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3192,7 +3192,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3242,14 +3242,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3270,11 +3270,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3579,9 +3579,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3602,7 +3602,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3647,8 +3647,8 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3671,7 +3671,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3721,14 +3721,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3749,11 +3749,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4186,13 +4186,13 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4212,7 +4212,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4649,13 +4649,13 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4675,7 +4675,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4700,8 +4700,8 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4724,7 +4724,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4754,9 +4754,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4777,7 +4777,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4846,14 +4846,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4874,11 +4874,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -4892,8 +4892,8 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4916,7 +4916,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4946,9 +4946,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4969,7 +4969,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5038,14 +5038,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5066,11 +5066,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5222,13 +5222,13 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5248,7 +5248,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5404,13 +5404,13 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5430,11 +5430,10 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp index 37a87fc25003..40a815b8d352 100644 --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -638,7 +638,7 @@ void foo() { // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK1-NEXT: ret void @@ -670,7 +670,7 @@ void foo() { // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK1-NEXT: ret void @@ -710,11 +710,11 @@ void foo() { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[LC_ADDR]] to float* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[GBLB_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = fptosi float [[TMP4]] to i64 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP3]], [[CONV4]] // CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD5]] to i32 @@ -1130,7 +1130,7 @@ void foo() { // CHECK2-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK2-NEXT: ret void @@ -1162,7 +1162,7 @@ void foo() { // CHECK2-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK2-NEXT: ret void @@ -1202,11 +1202,11 @@ void foo() { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[LC_ADDR]] to float* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[GBLB_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = fptosi float [[TMP4]] to i64 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP3]], [[CONV4]] // CHECK2-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD5]] to i32 @@ -3849,8 +3849,8 @@ void foo() { // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK33-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK33-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]]) // CHECK33-NEXT: ret void @@ -3882,8 +3882,8 @@ void foo() { // CHECK33-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK33-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) // CHECK33-NEXT: ret void @@ -3916,8 +3916,8 @@ void foo() { // CHECK34-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK34-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK34-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK34-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK34-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]]) // CHECK34-NEXT: ret void @@ -3949,8 +3949,8 @@ void foo() { // CHECK34-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK34-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK34-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK34-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK34-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) // CHECK34-NEXT: ret void @@ -4161,4 +4161,3 @@ void foo() { // CHECK44-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK44-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp index d2e99882fd11..24a4329d096b 100644 --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -328,8 +328,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -692,8 +692,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -3202,8 +3202,8 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK25-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK25-NEXT: ret void @@ -3562,8 +3562,8 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK26-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK26-NEXT: ret void @@ -4346,4 +4346,3 @@ int main (int argc, char **argv) { // CHECK28-NEXT: call void @__tgt_register_requires(i64 1) // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp index a16577e47b56..4737bdf9885d 100644 --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -1969,7 +1969,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -2025,7 +2025,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -2881,7 +2881,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -2937,7 +2937,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp index 79626ef5333f..d428cf999b1e 100644 --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -348,11 +348,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -449,7 +449,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -461,9 +461,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -699,7 +699,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -799,7 +799,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1140,11 +1140,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1241,7 +1241,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1253,9 +1253,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1491,7 +1491,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1591,7 +1591,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -3418,7 +3418,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3427,7 +3427,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3492,10 +3492,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -3675,7 +3675,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3684,7 +3684,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3749,10 +3749,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -3791,4 +3791,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp index c31965a4c645..61d18a0eb087 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -327,8 +327,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -901,8 +901,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -4806,8 +4806,8 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK25-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK25-NEXT: ret void @@ -5339,8 +5339,8 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK26-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK26-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp index 025ea7585edd..2be4e769c21d 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -3067,7 +3067,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3124,7 +3124,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -3155,7 +3155,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3710,7 +3710,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3745,7 +3745,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -3772,7 +3772,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4577,7 +4577,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -4634,7 +4634,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -4665,7 +4665,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -5220,7 +5220,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -5255,7 +5255,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -5282,7 +5282,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8336,4 +8336,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp index cb8e73661ba8..bf2459ced3e8 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -382,11 +382,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -485,11 +485,11 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -653,7 +653,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -665,9 +665,9 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -870,7 +870,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -971,7 +971,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1118,7 +1118,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1446,11 +1446,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1549,11 +1549,11 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1717,7 +1717,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1729,9 +1729,9 @@ int main() { // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1934,7 +1934,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2035,7 +2035,7 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2182,7 +2182,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -4512,7 +4512,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4521,7 +4521,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4588,7 +4588,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4597,7 +4597,7 @@ int main() { // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4683,10 +4683,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -4866,7 +4866,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4875,7 +4875,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4942,7 +4942,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4951,7 +4951,7 @@ int main() { // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5037,10 +5037,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5079,4 +5079,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp index 77700887dab5..a52665e07e52 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -769,7 +769,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -832,7 +832,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1281,7 +1281,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -1344,7 +1344,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -2106,7 +2106,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2169,7 +2169,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -2618,7 +2618,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2681,7 +2681,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -3443,7 +3443,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -3506,7 +3506,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -3955,7 +3955,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -4018,7 +4018,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -4780,7 +4780,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK6-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -4843,7 +4843,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -5292,7 +5292,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK6-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -5355,7 +5355,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -6117,7 +6117,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -6180,7 +6180,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -6629,7 +6629,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -6692,7 +6692,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -7454,7 +7454,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -7517,7 +7517,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -7966,7 +7966,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8029,7 +8029,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -8791,7 +8791,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK13-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8854,7 +8854,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -9303,7 +9303,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK13-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -9366,7 +9366,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -10128,7 +10128,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK14-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -10191,7 +10191,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: @@ -10640,7 +10640,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK14-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -10703,7 +10703,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp index 0b8ebc471a9e..0e06a7cfa116 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -346,7 +346,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -400,7 +400,7 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1117,7 +1117,7 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1498,7 +1498,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -1552,7 +1552,7 @@ int main() { // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2269,7 +2269,7 @@ int main() { // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2650,7 +2650,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -2704,7 +2704,7 @@ int main() { // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3412,7 +3412,7 @@ int main() { // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3802,7 +3802,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -3856,7 +3856,7 @@ int main() { // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4564,7 +4564,7 @@ int main() { // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4680,4 +4680,3 @@ int main() { // CHECK6-NEXT: call void @__tgt_register_requires(i64 1) // CHECK6-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index b1a46f0dfbb5..0ea08bb4d412 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -8233,7 +8233,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -8290,7 +8290,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -8321,7 +8321,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8700,7 +8700,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -8786,7 +8786,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8865,7 +8865,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9372,7 +9372,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -9434,7 +9434,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9487,7 +9487,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -9714,7 +9714,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -9776,7 +9776,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9829,7 +9829,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10702,7 +10702,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -10759,7 +10759,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -10790,7 +10790,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11169,7 +11169,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -11255,7 +11255,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11334,7 +11334,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11841,7 +11841,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -11903,7 +11903,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11956,7 +11956,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -12183,7 +12183,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -12245,7 +12245,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12298,7 +12298,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17945,7 +17945,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18002,7 +18002,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -18033,7 +18033,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18412,7 +18412,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18498,7 +18498,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18577,7 +18577,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -19084,7 +19084,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -19146,7 +19146,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19199,7 +19199,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -19426,7 +19426,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -19488,7 +19488,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19541,7 +19541,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20414,7 +20414,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -20471,7 +20471,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -20502,7 +20502,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20881,7 +20881,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -20967,7 +20967,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21046,7 +21046,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21553,7 +21553,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -21615,7 +21615,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21668,7 +21668,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -21895,7 +21895,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -21957,7 +21957,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -22010,7 +22010,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index 4a89c664740c..4fa27a341fc5 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -347,8 +347,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -414,23 +414,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -524,27 +524,27 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -665,7 +665,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -791,7 +791,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -994,8 +994,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -1061,23 +1061,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1171,27 +1171,27 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1312,7 +1312,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1438,7 +1438,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1700,21 +1700,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1806,26 +1806,26 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1942,7 +1942,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2065,7 +2065,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2327,21 +2327,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2433,26 +2433,26 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2569,7 +2569,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2692,7 +2692,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3467,7 +3467,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3595,7 +3595,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3834,7 +3834,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3962,7 +3962,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4195,7 +4195,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4320,7 +4320,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4553,7 +4553,7 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -4678,7 +4678,7 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5174,7 +5174,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -5270,7 +5270,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -5429,7 +5429,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -5525,7 +5525,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -5680,7 +5680,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -5773,7 +5773,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -5928,7 +5928,7 @@ int main (int argc, char **argv) { // CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -6021,7 +6021,7 @@ int main (int argc, char **argv) { // CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -6509,7 +6509,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -6637,7 +6637,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK25-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -6742,8 +6742,8 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK25-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK25-NEXT: ret void @@ -6788,23 +6788,23 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !11 // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -6868,27 +6868,27 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -7120,7 +7120,7 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7248,7 +7248,7 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK26-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK26-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7353,8 +7353,8 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK26-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK26-NEXT: ret void @@ -7399,23 +7399,23 @@ int main (int argc, char **argv) { // CHECK26-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK26-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK26: omp.inner.for.cond: -// CHECK26-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK26-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK26-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK26: omp.inner.for.body: -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !11 // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK26: omp.inner.for.inc: -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK26-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7479,27 +7479,27 @@ int main (int argc, char **argv) { // CHECK26-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK26-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK26: omp.inner.for.cond: -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK26-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK26: omp.inner.for.body: -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK26-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK26: omp.inner.for.inc: -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7725,7 +7725,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -7850,7 +7850,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK27-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -7997,21 +7997,21 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !12 // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -8073,26 +8073,26 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -8318,7 +8318,7 @@ int main (int argc, char **argv) { // CHECK28-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK28-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: @@ -8443,7 +8443,7 @@ int main (int argc, char **argv) { // CHECK28-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK28-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK28-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: @@ -8590,21 +8590,21 @@ int main (int argc, char **argv) { // CHECK28-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK28-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK28: omp.inner.for.cond: -// CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK28-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK28-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK28: omp.inner.for.body: -// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK28-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !12 // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK28: omp.inner.for.inc: -// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK28-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK28-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: @@ -8666,26 +8666,26 @@ int main (int argc, char **argv) { // CHECK28-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK28-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK28: omp.inner.for.cond: -// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK28-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK28: omp.inner.for.body: -// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 +// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK28-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK28: omp.inner.for.inc: -// CHECK28-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK28-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK28-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK28-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index 0d8917b3a4e2..16d84deb0e2e 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -284,23 +284,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -364,28 +364,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -449,23 +449,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -529,28 +529,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -614,43 +614,43 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -714,28 +714,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -895,23 +895,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -975,28 +975,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1060,23 +1060,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1140,28 +1140,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1225,43 +1225,43 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1325,28 +1325,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1506,21 +1506,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1582,27 +1582,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1666,21 +1666,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1742,27 +1742,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1826,41 +1826,41 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1922,27 +1922,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2102,21 +2102,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2178,27 +2178,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2262,21 +2262,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2338,27 +2338,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2422,41 +2422,41 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK4: cond.true5: // CHECK4-NEXT: br label [[COND_END7:%.*]] // CHECK4: cond.false6: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: br label [[COND_END7]] // CHECK4: cond.end7: // CHECK4-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2518,27 +2518,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3370,23 +3370,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3483,27 +3483,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3606,23 +3606,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3719,27 +3719,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3780,7 +3780,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3837,7 +3837,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -3858,51 +3858,51 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK9: cond.true12: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END14:%.*]] // CHECK9: cond.false13: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END14]] // CHECK9: cond.end14: // CHECK9-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4002,27 +4002,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4185,23 +4185,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4265,27 +4265,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4349,23 +4349,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4429,27 +4429,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4475,7 +4475,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -4510,7 +4510,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -4529,47 +4529,47 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK9: cond.true6: // CHECK9-NEXT: br label [[COND_END8:%.*]] // CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[COND_END8]] // CHECK9: cond.end8: // CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4636,27 +4636,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4994,23 +4994,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5107,27 +5107,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5230,23 +5230,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5343,27 +5343,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5404,7 +5404,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -5461,7 +5461,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -5482,51 +5482,51 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK10: cond.true12: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END14:%.*]] // CHECK10: cond.false13: -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END14]] // CHECK10: cond.end14: // CHECK10-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK10-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5626,27 +5626,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5809,23 +5809,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5889,27 +5889,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5973,23 +5973,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6053,27 +6053,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6099,7 +6099,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -6134,7 +6134,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -6153,47 +6153,47 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK10: cond.true6: // CHECK10-NEXT: br label [[COND_END8:%.*]] // CHECK10: cond.false7: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[COND_END8]] // CHECK10: cond.end8: // CHECK10-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6260,27 +6260,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6615,21 +6615,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6724,26 +6724,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6845,21 +6845,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6954,26 +6954,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7088,48 +7088,48 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: // CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7226,26 +7226,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7407,21 +7407,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7483,26 +7483,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7566,21 +7566,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7642,26 +7642,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7738,44 +7738,44 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: // CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7839,26 +7839,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -8193,21 +8193,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8302,26 +8302,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8423,21 +8423,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8532,26 +8532,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8666,48 +8666,48 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK12: cond.true11: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13:%.*]] // CHECK12: cond.false12: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13]] // CHECK12: cond.end13: // CHECK12-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8804,26 +8804,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8985,21 +8985,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9061,26 +9061,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9144,21 +9144,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9220,26 +9220,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9316,44 +9316,44 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK12: cond.true5: // CHECK12-NEXT: br label [[COND_END7:%.*]] // CHECK12: cond.false6: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[COND_END7]] // CHECK12: cond.end7: // CHECK12-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9417,26 +9417,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 80291b5a3aa6..b0a0e22909a8 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -385,11 +385,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -477,33 +477,33 @@ int main() { // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -652,40 +652,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -887,7 +887,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -977,30 +977,30 @@ int main() { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1131,37 +1131,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1477,11 +1477,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1569,33 +1569,33 @@ int main() { // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1744,40 +1744,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1979,7 +1979,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2069,30 +2069,30 @@ int main() { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2223,37 +2223,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2653,29 +2653,29 @@ int main() { // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2820,38 +2820,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3139,27 +3139,27 @@ int main() { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3287,35 +3287,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3715,29 +3715,29 @@ int main() { // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3882,38 +3882,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4201,27 +4201,27 @@ int main() { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4349,35 +4349,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK4-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK4-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5935,7 +5935,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5944,7 +5944,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6002,36 +6002,36 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -6104,35 +6104,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -6303,7 +6303,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -6312,7 +6312,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6370,36 +6370,36 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6472,35 +6472,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp index 265e94c21ed3..45182c712032 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -177,22 +177,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -254,22 +254,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -329,26 +329,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -410,23 +410,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -548,22 +548,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -625,23 +625,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -701,26 +701,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -782,23 +782,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -822,7 +822,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -876,35 +876,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -966,23 +966,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1102,22 +1102,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1179,23 +1179,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1255,26 +1255,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1336,23 +1336,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1376,7 +1376,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -1430,35 +1430,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1520,23 +1520,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1626,22 +1626,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1703,22 +1703,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1778,26 +1778,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1859,23 +1859,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1997,22 +1997,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2074,23 +2074,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2150,26 +2150,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2231,23 +2231,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2271,7 +2271,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2325,35 +2325,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2415,23 +2415,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2551,22 +2551,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2628,23 +2628,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2704,26 +2704,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2785,23 +2785,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2825,7 +2825,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2879,35 +2879,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2969,23 +2969,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3075,22 +3075,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3152,22 +3152,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3227,26 +3227,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3308,23 +3308,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3446,22 +3446,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3523,23 +3523,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3720,7 +3720,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -3775,47 +3775,47 @@ int main() { // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] @@ -3831,13 +3831,13 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: @@ -3905,7 +3905,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -3927,23 +3927,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4033,7 +4033,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4055,23 +4055,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4234,22 +4234,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4311,23 +4311,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4508,7 +4508,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -4562,35 +4562,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4652,23 +4652,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4758,22 +4758,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4835,22 +4835,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4910,26 +4910,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4991,23 +4991,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5129,22 +5129,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5206,23 +5206,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5403,7 +5403,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -5458,47 +5458,47 @@ int main() { // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] @@ -5514,13 +5514,13 @@ int main() { // CHECK4-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK4-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK4-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK4: omp_if.then15: @@ -5588,7 +5588,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5610,23 +5610,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5716,7 +5716,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5738,23 +5738,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5917,22 +5917,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5994,23 +5994,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6191,7 +6191,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -6245,35 +6245,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6335,23 +6335,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6393,22 +6393,22 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6418,23 +6418,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z9gtid_testv() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6468,23 +6468,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn4v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6494,23 +6494,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn5v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6524,23 +6524,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn6v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6576,23 +6576,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn1v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6602,23 +6602,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn2v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6632,23 +6632,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn3v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6674,22 +6674,22 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6699,23 +6699,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z9gtid_testv() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6749,23 +6749,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn4v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6775,23 +6775,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn5v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6805,23 +6805,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn6v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6857,23 +6857,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn1v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6883,23 +6883,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn2v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6913,23 +6913,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn3v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6955,22 +6955,22 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6980,23 +6980,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK7-NEXT: call void @_Z9gtid_testv() +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7030,23 +7030,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn4v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7090,23 +7090,23 @@ int main() { // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn6v() +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7167,23 +7167,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn1v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7223,23 +7223,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn3v() +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7265,22 +7265,22 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7290,23 +7290,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK8-NEXT: call void @_Z9gtid_testv() +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7340,23 +7340,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn4v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7400,23 +7400,23 @@ int main() { // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn6v() +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7477,23 +7477,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn1v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7533,23 +7533,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn3v() +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7623,22 +7623,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7700,22 +7700,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7775,26 +7775,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7856,23 +7856,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7994,22 +7994,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8071,23 +8071,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8147,26 +8147,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8228,23 +8228,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8268,7 +8268,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8322,35 +8322,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8412,23 +8412,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8548,22 +8548,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8625,23 +8625,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8701,26 +8701,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8782,23 +8782,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8822,7 +8822,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8876,35 +8876,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8966,23 +8966,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9072,22 +9072,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9149,22 +9149,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9224,26 +9224,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9305,23 +9305,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9443,22 +9443,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9520,23 +9520,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9596,26 +9596,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9677,23 +9677,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9717,7 +9717,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -9771,35 +9771,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9861,23 +9861,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9997,22 +9997,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10074,23 +10074,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10150,26 +10150,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10231,23 +10231,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10271,7 +10271,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -10325,35 +10325,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10415,23 +10415,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10521,22 +10521,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10598,22 +10598,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10673,26 +10673,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10754,23 +10754,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10892,22 +10892,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10969,23 +10969,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11166,7 +11166,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -11221,47 +11221,47 @@ int main() { // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] @@ -11277,13 +11277,13 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK11-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: @@ -11351,7 +11351,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11373,23 +11373,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11479,7 +11479,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11501,23 +11501,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11680,22 +11680,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11757,23 +11757,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11954,7 +11954,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -12008,35 +12008,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12098,23 +12098,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12204,22 +12204,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12281,22 +12281,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12356,26 +12356,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12437,23 +12437,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12575,22 +12575,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12652,23 +12652,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12849,7 +12849,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -12904,47 +12904,47 @@ int main() { // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] @@ -12960,13 +12960,13 @@ int main() { // CHECK12-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK12-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK12-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK12-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK12: omp_if.then15: @@ -13034,7 +13034,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13056,23 +13056,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13162,7 +13162,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13184,23 +13184,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13363,22 +13363,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13440,23 +13440,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13637,7 +13637,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -13691,35 +13691,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13781,23 +13781,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13839,22 +13839,22 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13864,23 +13864,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z9gtid_testv() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13914,23 +13914,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn4v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13940,23 +13940,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn5v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13970,23 +13970,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn6v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14022,23 +14022,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn1v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14048,23 +14048,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn2v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14078,23 +14078,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn3v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14120,22 +14120,22 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14145,23 +14145,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z9gtid_testv() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14195,23 +14195,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn4v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14221,23 +14221,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn5v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14251,23 +14251,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn6v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14303,23 +14303,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn1v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14329,23 +14329,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn2v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14359,23 +14359,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn3v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14401,22 +14401,22 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14426,23 +14426,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK15-NEXT: call void @_Z9gtid_testv() +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14476,23 +14476,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn4v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14536,23 +14536,23 @@ int main() { // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn6v() +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14613,23 +14613,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn1v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14669,23 +14669,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn3v() +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14711,22 +14711,22 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14736,23 +14736,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK16-NEXT: call void @_Z9gtid_testv() +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14786,23 +14786,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn4v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14846,23 +14846,23 @@ int main() { // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn6v() +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -14923,23 +14923,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn1v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14979,23 +14979,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn3v() +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp index 36fb55d14161..59171d40b5d9 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -229,24 +229,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -307,26 +307,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -343,7 +343,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK1-NEXT: unreachable // // @@ -362,7 +362,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -411,26 +411,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -491,26 +491,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -527,7 +527,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK1-NEXT: unreachable // // @@ -671,24 +671,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -749,26 +749,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -785,7 +785,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK1-NEXT: unreachable // // @@ -832,24 +832,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -910,26 +910,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -946,7 +946,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK1-NEXT: unreachable // // @@ -993,24 +993,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1071,26 +1071,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1107,7 +1107,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK1-NEXT: unreachable // // @@ -1184,26 +1184,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1264,26 +1264,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1300,7 +1300,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK1-NEXT: unreachable // // @@ -1465,24 +1465,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1543,26 +1543,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1579,7 +1579,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK2-NEXT: unreachable // // @@ -1598,7 +1598,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -1647,26 +1647,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1727,26 +1727,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1763,7 +1763,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK2-NEXT: unreachable // // @@ -1907,24 +1907,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1985,26 +1985,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2021,7 +2021,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK2-NEXT: unreachable // // @@ -2068,24 +2068,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2146,26 +2146,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2182,7 +2182,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK2-NEXT: unreachable // // @@ -2229,24 +2229,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2307,26 +2307,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2343,7 +2343,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK2-NEXT: unreachable // // @@ -2420,26 +2420,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2500,26 +2500,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2536,7 +2536,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK2-NEXT: unreachable // // @@ -3399,24 +3399,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3477,26 +3477,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3513,7 +3513,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK5-NEXT: unreachable // // @@ -3532,7 +3532,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -3581,26 +3581,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3661,26 +3661,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3697,7 +3697,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK5-NEXT: unreachable // // @@ -3832,24 +3832,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3910,26 +3910,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3946,7 +3946,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK5-NEXT: unreachable // // @@ -3993,24 +3993,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4071,26 +4071,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4107,7 +4107,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK5-NEXT: unreachable // // @@ -4154,24 +4154,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4232,26 +4232,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4268,7 +4268,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK5-NEXT: unreachable // // @@ -4345,26 +4345,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4425,26 +4425,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4461,7 +4461,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK5-NEXT: unreachable // // @@ -4635,24 +4635,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4713,26 +4713,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4749,7 +4749,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK6-NEXT: unreachable // // @@ -4768,7 +4768,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -4817,26 +4817,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4897,26 +4897,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4933,7 +4933,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK6-NEXT: unreachable // // @@ -5068,24 +5068,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5146,26 +5146,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5182,7 +5182,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK6-NEXT: unreachable // // @@ -5229,24 +5229,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5307,26 +5307,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5343,7 +5343,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK6-NEXT: unreachable // // @@ -5390,24 +5390,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5468,26 +5468,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5504,7 +5504,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK6-NEXT: unreachable // // @@ -5581,26 +5581,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5661,26 +5661,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5697,7 +5697,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK6-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index c62b4386594f..7d2b99806b86 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -9621,7 +9621,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -9678,7 +9678,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -9709,7 +9709,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -10136,7 +10136,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -10222,7 +10222,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -10313,7 +10313,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10860,7 +10860,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -10922,7 +10922,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -10982,7 +10982,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -11230,7 +11230,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -11292,7 +11292,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -11352,7 +11352,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12280,7 +12280,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -12337,7 +12337,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -12368,7 +12368,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -12795,7 +12795,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -12881,7 +12881,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -12972,7 +12972,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13519,7 +13519,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -13581,7 +13581,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -13641,7 +13641,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -13889,7 +13889,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -13951,7 +13951,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -14011,7 +14011,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -14939,7 +14939,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -14996,7 +14996,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -15027,7 +15027,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -15454,7 +15454,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -15540,7 +15540,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -15631,7 +15631,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16178,7 +16178,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK15-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -16240,7 +16240,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -16300,7 +16300,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -16548,7 +16548,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK15-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -16610,7 +16610,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -16670,7 +16670,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17598,7 +17598,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -17655,7 +17655,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -17686,7 +17686,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -18113,7 +18113,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18199,7 +18199,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -18290,7 +18290,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -18837,7 +18837,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK16-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18899,7 +18899,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -18959,7 +18959,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -19207,7 +19207,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK16-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -19269,7 +19269,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -19329,7 +19329,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp index 7e95a42396ea..245bcc0af26c 100644 --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -380,8 +380,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], i32* [[CONV3]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -789,8 +789,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], i32* [[CONV3]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -4329,7 +4329,7 @@ int main (int argc, char **argv) { // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -4551,7 +4551,7 @@ int main (int argc, char **argv) { // CHECK22-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK22-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -4773,7 +4773,7 @@ int main (int argc, char **argv) { // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -4993,7 +4993,7 @@ int main (int argc, char **argv) { // CHECK24-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK24-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -5991,8 +5991,8 @@ int main (int argc, char **argv) { // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK33-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK33-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK33-NEXT: ret void @@ -6369,8 +6369,8 @@ int main (int argc, char **argv) { // CHECK34-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK34-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK34-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK34-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK34-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK34-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK34-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK34-NEXT: ret void @@ -7321,7 +7321,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK37-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK37-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK37-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK37-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -7399,7 +7399,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK37-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK37-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK37-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK37-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK37: omp_if.then: @@ -7552,8 +7552,8 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK37-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK37-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK37-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK37-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK37-NEXT: ret void @@ -7764,7 +7764,7 @@ int main (int argc, char **argv) { // CHECK38-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK38-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK38-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK38-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK38-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK38-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -7842,7 +7842,7 @@ int main (int argc, char **argv) { // CHECK38-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK38-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK38-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK38-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK38-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK38-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK38: omp_if.then: @@ -7995,8 +7995,8 @@ int main (int argc, char **argv) { // CHECK38-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK38-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK38-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK38-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK38-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK38-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK38-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK38-NEXT: ret void @@ -8281,7 +8281,7 @@ int main (int argc, char **argv) { // CHECK39-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK39-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK39-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK39-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK39: omp_if.then: @@ -8713,7 +8713,7 @@ int main (int argc, char **argv) { // CHECK40-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK40-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK40-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK40-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK40-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK40-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK40: omp_if.then: diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp index fbf312e5e312..ec700b36af04 100644 --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -2571,7 +2571,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -2627,7 +2627,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -3540,7 +3540,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3596,7 +3596,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp index 9f432be90370..e04322d0eaaf 100644 --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -348,11 +348,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -438,40 +438,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -706,7 +706,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -795,37 +795,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1154,11 +1154,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1244,40 +1244,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] // CHECK2-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1512,7 +1512,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1601,37 +1601,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2042,38 +2042,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2393,35 +2393,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2832,38 +2832,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] // CHECK4-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK4-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3183,35 +3183,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] // CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !12 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4778,7 +4778,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4787,7 +4787,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4843,35 +4843,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -5042,7 +5042,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5051,7 +5051,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5107,35 +5107,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 +// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8 -// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK10-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp index 1866f264dea7..1df54320269a 100644 --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -180,7 +180,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -206,7 +206,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK1-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK1-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -245,7 +245,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -271,7 +271,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK2-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK2-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -578,11 +578,11 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -639,14 +639,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK9-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -711,7 +711,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1309,11 +1309,11 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1370,14 +1370,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK10-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -1442,7 +1442,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -5258,4 +5258,3 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK20-NEXT: call void @__tgt_register_requires(i64 1) // CHECK20-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/Preprocessor/hexagon-predefines.c b/clang/test/Preprocessor/hexagon-predefines.c index e948d7024850..576dc4b6cb38 100644 --- a/clang/test/Preprocessor/hexagon-predefines.c +++ b/clang/test/Preprocessor/hexagon-predefines.c @@ -107,6 +107,17 @@ // CHECK-V68HVX-128B: #define __HVX__ 1 // CHECK-V68HVX-128B: #define __hexagon__ 1 +// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv69 \ +// RUN: -target-feature +hvxv69 -target-feature +hvx-length128b %s | FileCheck \ +// RUN: %s -check-prefix CHECK-V69HVX-128B +// CHECK-V69HVX-128B: #define __HEXAGON_ARCH__ 69 +// CHECK-V69HVX-128B: #define __HEXAGON_V69__ 1 +// CHECK-V69HVX-128B: #define __HVX_ARCH__ 69 +// CHECK-V69HVX-128B: #define __HVX_LENGTH__ 128 +// CHECK-V69HVX-128B: #define __HVX__ 1 +// CHECK-V69HVX-128B: #define __hexagon__ 1 + + // RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv67 \ // RUN: -target-feature +hvxv67 -target-feature +hvx-length128b %s | FileCheck \ // RUN: %s -check-prefix CHECK-ELF diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c index 0d1aecaa99c3..8ee64d50de38 100644 --- a/clang/test/Sema/builtins-reduction-math.c +++ b/clang/test/Sema/builtins-reduction-math.c @@ -35,3 +35,20 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) { i = __builtin_reduce_min(i); // expected-error@-1 {{1st argument must be a vector type (was 'int')}} } + +void test_builtin_reduce_xor(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_xor(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_xor(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_xor(iv, iv); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_xor(i); + // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}} + + i = __builtin_reduce_xor(v); + // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} +} diff --git a/clang/test/SemaCXX/coroutines-exp-namespace.cpp b/clang/test/SemaCXX/coroutines-exp-namespace.cpp index 6fce00c09ca6..a5ad37e338d2 100644 --- a/clang/test/SemaCXX/coroutines-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutines-exp-namespace.cpp @@ -978,19 +978,6 @@ extern "C" int f(mismatch_gro_type_tag4) { co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} } -struct bad_promise_no_return_func { // expected-note {{'bad_promise_no_return_func' defined here}} - coro get_return_object(); - suspend_always initial_suspend(); - suspend_always final_suspend() noexcept; - void unhandled_exception(); -}; -// FIXME: The PDTS currently specifies this as UB, technically forbidding a -// diagnostic. -coro no_return_value_or_return_void() { - // expected-error@-1 {{'bad_promise_no_return_func' must declare either 'return_value' or 'return_void'}} - co_await a; -} - struct bad_await_suspend_return { bool await_ready(); // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'char')}} diff --git a/clang/test/SemaCXX/coroutines.cpp b/clang/test/SemaCXX/coroutines.cpp index e175e3f9fa64..b461c881355b 100644 --- a/clang/test/SemaCXX/coroutines.cpp +++ b/clang/test/SemaCXX/coroutines.cpp @@ -970,19 +970,36 @@ extern "C" int f(mismatch_gro_type_tag4) { co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} } -struct bad_promise_no_return_func { // expected-note {{'bad_promise_no_return_func' defined here}} - coro get_return_object(); +struct promise_no_return_func { + coro get_return_object(); suspend_always initial_suspend(); suspend_always final_suspend() noexcept; void unhandled_exception(); }; -// FIXME: The PDTS currently specifies this as UB, technically forbidding a -// diagnostic. -coro no_return_value_or_return_void() { - // expected-error@-1 {{'bad_promise_no_return_func' must declare either 'return_value' or 'return_void'}} +// [dcl.fct.def.coroutine]/p6 +// If searches for the names return_­void and return_­value in the scope of +// the promise type each find any declarations, the program is ill-formed. +// [Note 1: If return_­void is found, flowing off the end of a coroutine is +// equivalent to a co_­return with no operand. Otherwise, flowing off the end +// of a coroutine results in undefined behavior ([stmt.return.coroutine]). — +// end note] +// +// So it isn't ill-formed if the promise doesn't define return_value and return_void. +// It is just a potential UB. +coro no_return_value_or_return_void() { co_await a; } +// The following two tests that it would emit correct diagnostic message +// if we co_return in `promise_no_return_func`. +coro no_return_value_or_return_void_2() { + co_return; // expected-error {{no member named 'return_void'}} +} + +coro no_return_value_or_return_void_3() { + co_return 43; // expected-error {{no member named 'return_value'}} +} + struct bad_await_suspend_return { bool await_ready(); // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'char')}} diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected index 03e0f8ef60e0..7cc6b0dae4d4 100644 --- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected +++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected @@ -32,7 +32,7 @@ void foo(int a) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-NEXT: call void @use(i32 [[TMP0]]) // CHECK-NEXT: ret void // diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp index bf694e575a9d..aa38b5e4aa26 100644 --- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp +++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp @@ -227,6 +227,4 @@ void clang_fuzzer::HandleLLVM(const std::string &IR, if (memcmp(OptArrays, UnoptArrays, kTotalSize)) ErrorAndExit("!!!BUG!!!"); - - return; } diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index c9129ee9e502..a7bfb07e002b 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -120,7 +120,7 @@ static void ApplyOneQAOverride(raw_ostream &OS, OS << "### Adding argument " << Str << " at end\n"; Args.push_back(Str); } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { + Edit.slice(2, Edit.size() - 1).contains('/')) { StringRef MatchPattern = Edit.substr(2).split('/').first; StringRef ReplPattern = Edit.substr(2).split('/').second; ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index c59c5709b6fa..4722bece7a1d 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2275,6 +2275,8 @@ void OMPClauseEnqueue::VisitOMPUpdateClause(const OMPUpdateClause *) {} void OMPClauseEnqueue::VisitOMPCaptureClause(const OMPCaptureClause *) {} +void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} + void OMPClauseEnqueue::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseEnqueue::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a787e26ccd5a..374f3865acc3 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2504,6 +2504,7 @@ TEST_F(FormatTest, ShortEnums) { FormatStyle Style = getLLVMStyle(); Style.AllowShortEnumsOnASingleLine = true; verifyFormat("enum { A, B, C } ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum { A, B, C } ShortEnum1, ShortEnum2;", Style); Style.AllowShortEnumsOnASingleLine = false; verifyFormat("enum {\n" " A,\n" @@ -2511,6 +2512,20 @@ TEST_F(FormatTest, ShortEnums) { " C\n" "} ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum {\n" + " A,\n" + " B,\n" + " C\n" + "} ShortEnum1, ShortEnum2;", + Style); + verifyFormat("enum {\n" + " A,\n" + "} ShortEnum1, ShortEnum2;", + Style); + verifyFormat("typedef enum {\n" + " A,\n" + "} ShortEnum1, ShortEnum2;", + Style); Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.AfterEnum = true; verifyFormat("enum\n" @@ -3556,6 +3571,11 @@ TEST_F(FormatTest, FormatsNamespaces) { "struct b_struct {};\n" "} // namespace B\n", Style); + verifyFormat("template constexpr void foo requires(I == 42) {}\n" + "namespace ns {\n" + "void foo() {}\n" + "} // namespace ns\n", + Style); } TEST_F(FormatTest, NamespaceMacros) { diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h index d6209a3ea2b2..bc0652c99a14 100644 --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -54,6 +54,10 @@ dfsan_origin dfsan_get_origin(long data); /// Retrieves the label associated with the data at the given address. dfsan_label dfsan_read_label(const void *addr, size_t size); +/// Return the origin associated with the first taint byte in the size bytes +/// from the address addr. +dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size); + /// Returns whether the given label label contains the label elem. int dfsan_has_label(dfsan_label label, dfsan_label elem); @@ -87,6 +91,9 @@ void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, /// prints description at the beginning of the trace. If origin tracking is not /// on, or the address is not labeled, it prints nothing. void dfsan_print_origin_trace(const void *addr, const char *description); +/// As above, but use an origin id from dfsan_get_origin() instead of address. +/// Does not include header line with taint label and address information. +void dfsan_print_origin_id_trace(dfsan_origin origin); /// Prints the origin trace of the label at the address \p addr to a /// pre-allocated output buffer. If origin tracking is not on, or the address is @@ -124,6 +131,10 @@ void dfsan_print_origin_trace(const void *addr, const char *description); /// return value is not less than \p out_buf_size. size_t dfsan_sprint_origin_trace(const void *addr, const char *description, char *out_buf, size_t out_buf_size); +/// As above, but use an origin id from dfsan_get_origin() instead of address. +/// Does not include header line with taint label and address information. +size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf, + size_t out_buf_size); /// Prints the stack trace leading to this call to a pre-allocated output /// buffer. diff --git a/compiler-rt/lib/asan/asan_interface.inc b/compiler-rt/lib/asan/asan_interface.inc index 54ddfe5a26f0..89ef552b7117 100644 --- a/compiler-rt/lib/asan/asan_interface.inc +++ b/compiler-rt/lib/asan/asan_interface.inc @@ -180,36 +180,3 @@ INTERFACE_FUNCTION(__asan_update_allocation_context) INTERFACE_WEAK_FUNCTION(__asan_default_options) INTERFACE_WEAK_FUNCTION(__asan_default_suppressions) INTERFACE_WEAK_FUNCTION(__asan_on_error) - -#if defined(__x86_64__) && !defined(__APPLE__) && !defined(SANITIZER_WINDOWS) - -# define ASAN_MEMORY_ACCESS_CALLBACK_ADD(s, reg, op) \ - INTERFACE_FUNCTION(__asan_check_##op##_add_##s##_##reg) - -# define ASAN_MEMORY_ACCESS_CALLBACKS_ADD(reg) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(1, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(1, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(2, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(2, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(4, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(4, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(8, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(8, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(16, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(16, reg, store) - -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RAX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RBX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RCX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RDX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RSI) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RDI) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RBP) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R8) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R9) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R12) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R13) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R14) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R15) - -#endif // defined(__x86_64__) diff --git a/compiler-rt/lib/asan/asan_rtl_x86_64.S b/compiler-rt/lib/asan/asan_rtl_x86_64.S index 9dcb8627ba28..d27db745ed67 100644 --- a/compiler-rt/lib/asan/asan_rtl_x86_64.S +++ b/compiler-rt/lib/asan/asan_rtl_x86_64.S @@ -16,6 +16,7 @@ #define BEGINF(reg, op, s, i) \ .globl FNAME(reg, op, s, i) ;\ +.hidden FNAME(reg, op, s, i) ;\ ASM_TYPE_FUNCTION(FNAME(reg, op, s, i)) ;\ .cfi_startproc ;\ FNAME(reg, op, s, i): ;\ diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index ce2c04df83a8..ee7221c7b9a8 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -630,22 +630,16 @@ void PrintInvalidOriginWarning(dfsan_label label, const void *address) { d.Warning(), label, address, d.Default()); } -bool PrintOriginTraceToStr(const void *addr, const char *description, - InternalScopedString *out) { - CHECK(out); - CHECK(dfsan_get_track_origins()); +void PrintInvalidOriginIdWarning(dfsan_origin origin) { Decorator d; + Printf( + " %sOrigin Id %d has invalid origin tracking. This can " + "be a DFSan bug.%s\n", + d.Warning(), origin, d.Default()); +} - const dfsan_label label = *__dfsan::shadow_for(addr); - CHECK(label); - - const dfsan_origin origin = *__dfsan::origin_for(addr); - - out->append(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n", - d.Origin(), label, addr, description ? description : "", - d.Default()); - - Origin o = Origin::FromRawId(origin); +bool PrintOriginTraceFramesToStr(Origin o, InternalScopedString *out) { + Decorator d; bool found = false; while (o.isChainedOrigin()) { @@ -668,6 +662,25 @@ bool PrintOriginTraceToStr(const void *addr, const char *description, return found; } +bool PrintOriginTraceToStr(const void *addr, const char *description, + InternalScopedString *out) { + CHECK(out); + CHECK(dfsan_get_track_origins()); + Decorator d; + + const dfsan_label label = *__dfsan::shadow_for(addr); + CHECK(label); + + const dfsan_origin origin = *__dfsan::origin_for(addr); + + out->append(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n", + d.Origin(), label, addr, description ? description : "", + d.Default()); + + Origin o = Origin::FromRawId(origin); + return PrintOriginTraceFramesToStr(o, out); +} + } // namespace extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace( @@ -725,6 +738,50 @@ dfsan_sprint_origin_trace(const void *addr, const char *description, return trace.length(); } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_id_trace( + dfsan_origin origin) { + if (!dfsan_get_track_origins()) { + PrintNoOriginTrackingWarning(); + return; + } + Origin o = Origin::FromRawId(origin); + + InternalScopedString trace; + bool success = PrintOriginTraceFramesToStr(o, &trace); + + if (trace.length()) + Printf("%s", trace.data()); + + if (!success) + PrintInvalidOriginIdWarning(origin); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr dfsan_sprint_origin_id_trace( + dfsan_origin origin, char *out_buf, uptr out_buf_size) { + CHECK(out_buf); + + if (!dfsan_get_track_origins()) { + PrintNoOriginTrackingWarning(); + return 0; + } + Origin o = Origin::FromRawId(origin); + + InternalScopedString trace; + bool success = PrintOriginTraceFramesToStr(o, &trace); + + if (!success) { + PrintInvalidOriginIdWarning(origin); + return 0; + } + + if (out_buf_size) { + internal_strncpy(out_buf, trace.data(), out_buf_size - 1); + out_buf[out_buf_size - 1] = '\0'; + } + + return trace.length(); +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin dfsan_get_init_origin(const void *addr) { if (!dfsan_get_track_origins()) diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt index eef7c48948cc..fc2dd02ccf5f 100644 --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -30,12 +30,18 @@ fun:dfsan_flush=uninstrumented fun:dfsan_flush=discard fun:dfsan_print_origin_trace=uninstrumented fun:dfsan_print_origin_trace=discard +fun:dfsan_print_origin_id_trace=uninstrumented +fun:dfsan_print_origin_id_trace=discard fun:dfsan_sprint_origin_trace=uninstrumented fun:dfsan_sprint_origin_trace=discard +fun:dfsan_sprint_origin_id_trace=uninstrumented +fun:dfsan_sprint_origin_id_trace=discard fun:dfsan_sprint_stack_trace=uninstrumented fun:dfsan_sprint_stack_trace=discard fun:dfsan_get_origin=uninstrumented fun:dfsan_get_origin=custom +fun:dfsan_read_origin_of_first_taint=uninstrumented +fun:dfsan_read_origin_of_first_taint=discard fun:dfsan_get_init_origin=uninstrumented fun:dfsan_get_init_origin=discard fun:dfsan_get_track_origins=uninstrumented diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 085e9040221e..00b736e93ade 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -1560,6 +1560,7 @@ TEST(MemorySanitizer, memccpy_nomatch_positive) { char* y = new char[5]; strcpy(x, "abc"); EXPECT_UMR(memccpy(y, x, 'd', 5)); + break_optimization(y); delete[] x; delete[] y; } @@ -1570,6 +1571,7 @@ TEST(MemorySanitizer, memccpy_match_positive) { x[0] = 'a'; x[2] = 'b'; EXPECT_UMR(memccpy(y, x, 'b', 5)); + break_optimization(y); delete[] x; delete[] y; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index d219734fa0a3..b0ab08dff1db 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -7857,12 +7857,12 @@ INTERCEPTOR(void, setbuf, __sanitizer_FILE *stream, char *buf) { unpoison_file(stream); } -INTERCEPTOR(void, setbuffer, __sanitizer_FILE *stream, char *buf, int mode) { +INTERCEPTOR(void, setbuffer, __sanitizer_FILE *stream, char *buf, SIZE_T size) { void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, setbuffer, stream, buf, mode); - REAL(setbuffer)(stream, buf, mode); + COMMON_INTERCEPTOR_ENTER(ctx, setbuffer, stream, buf, size); + REAL(setbuffer)(stream, buf, size); if (buf) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer_bufsiz); + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); } if (stream) unpoison_file(stream); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc index 38f9531148d4..a5259be9335a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc @@ -11,3 +11,5 @@ INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_code) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_data) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_demangle) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_flush) +INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_demangle) +INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_inline_frames) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index 8295537d6f7a..0ca91aff8dd4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -179,6 +179,7 @@ COMMON_FLAG(bool, use_madv_dontdump, true, "in core file.") COMMON_FLAG(bool, symbolize_inline_frames, true, "Print inlined frames in stacktraces. Defaults to true.") +COMMON_FLAG(bool, demangle, true, "Print demangled symbols.") COMMON_FLAG(bool, symbolize_vs_style, false, "Print file locations in Visual Studio style (e.g: " " file(10,42): ...") diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h index 1eda31d0a817..df122ed3425c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h @@ -86,7 +86,7 @@ class SymbolizerProcess { ~SymbolizerProcess() {} /// The maximum number of arguments required to invoke a tool process. - static const unsigned kArgVMax = 6; + static const unsigned kArgVMax = 16; // Customizable by subclasses. virtual bool StartSymbolizerSubprocess(); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 03f5cfbb7eac..8bbd4af0c7c2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -274,14 +274,17 @@ class LLVMSymbolizerProcess final : public SymbolizerProcess { const char* const kSymbolizerArch = "--default-arch=unknown"; #endif - const char *const inline_flag = common_flags()->symbolize_inline_frames - ? "--inlines" - : "--no-inlines"; + const char *const demangle_flag = + common_flags()->demangle ? "--demangle" : "--no-demangle"; + const char *const inline_flag = + common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines"; int i = 0; argv[i++] = path_to_binary; + argv[i++] = demangle_flag; argv[i++] = inline_flag; argv[i++] = kSymbolizerArch; argv[i++] = nullptr; + CHECK_LE(i, kArgVMax); } }; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp index a916a03234fe..ac811c8a9136 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp @@ -91,6 +91,7 @@ class AtosSymbolizerProcess final : public SymbolizerProcess { argv[i++] = "-d"; } argv[i++] = nullptr; + CHECK_LE(i, kArgVMax); } char pid_str_[16]; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index b128ffc311a2..5f6e4cc3180e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -213,9 +213,14 @@ class Addr2LineProcess final : public SymbolizerProcess { const char *(&argv)[kArgVMax]) const override { int i = 0; argv[i++] = path_to_binary; - argv[i++] = "-iCfe"; + if (common_flags()->demangle) + argv[i++] = "-C"; + if (common_flags()->symbolize_inline_frames) + argv[i++] = "-i"; + argv[i++] = "-fe"; argv[i++] = module_name_; argv[i++] = nullptr; + CHECK_LE(i, kArgVMax); } bool ReachedEndOfOutput(const char *buffer, uptr length) const override; @@ -312,37 +317,42 @@ class Addr2LinePool final : public SymbolizerTool { FIRST_32_SECOND_64(UINT32_MAX, UINT64_MAX); }; -#if SANITIZER_SUPPORTS_WEAK_HOOKS +# if SANITIZER_SUPPORTS_WEAK_HOOKS extern "C" { SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, - char *Buffer, int MaxLength, - bool SymbolizeInlineFrames); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, - char *Buffer, int MaxLength); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -void __sanitizer_symbolize_flush(); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, - int MaxLength); + char *Buffer, int MaxLength); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool +__sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, + char *Buffer, int MaxLength); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_symbolize_flush(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int +__sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool +__sanitizer_symbolize_set_demangle(bool Demangle); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool +__sanitizer_symbolize_set_inline_frames(bool InlineFrames); } // extern "C" class InternalSymbolizer final : public SymbolizerTool { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { - if (__sanitizer_symbolize_code != 0 && - __sanitizer_symbolize_data != 0) { - return new(*alloc) InternalSymbolizer(); - } + if (__sanitizer_symbolize_set_demangle) + CHECK(__sanitizer_symbolize_set_demangle(common_flags()->demangle)); + if (__sanitizer_symbolize_set_inline_frames) + CHECK(__sanitizer_symbolize_set_inline_frames( + common_flags()->symbolize_inline_frames)); + if (__sanitizer_symbolize_code && __sanitizer_symbolize_data) + return new (*alloc) InternalSymbolizer(); return 0; } bool SymbolizePC(uptr addr, SymbolizedStack *stack) override { bool result = __sanitizer_symbolize_code( - stack->info.module, stack->info.module_offset, buffer_, kBufferSize, - common_flags()->symbolize_inline_frames); - if (result) ParseSymbolizePCOutput(buffer_, stack); + stack->info.module, stack->info.module_offset, buffer_, kBufferSize); + if (result) + ParseSymbolizePCOutput(buffer_, stack); return result; } @@ -365,7 +375,7 @@ class InternalSymbolizer final : public SymbolizerTool { if (__sanitizer_symbolize_demangle) { for (uptr res_length = 1024; res_length <= InternalSizeClassMap::kMaxSize;) { - char *res_buff = static_cast(InternalAlloc(res_length)); + char *res_buff = static_cast(InternalAlloc(res_length)); uptr req_length = __sanitizer_symbolize_demangle(name, res_buff, res_length); if (req_length > res_length) { @@ -380,19 +390,19 @@ class InternalSymbolizer final : public SymbolizerTool { } private: - InternalSymbolizer() { } + InternalSymbolizer() {} static const int kBufferSize = 16 * 1024; char buffer_[kBufferSize]; }; -#else // SANITIZER_SUPPORTS_WEAK_HOOKS +# else // SANITIZER_SUPPORTS_WEAK_HOOKS class InternalSymbolizer final : public SymbolizerTool { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; } }; -#endif // SANITIZER_SUPPORTS_WEAK_HOOKS +# endif // SANITIZER_SUPPORTS_WEAK_HOOKS const char *Symbolizer::PlatformDemangle(const char *name) { return DemangleSwiftAndCXX(name); diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp index 3809880d50b4..80cab36426c5 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp +++ b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp @@ -17,10 +17,17 @@ #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +static llvm::symbolize::LLVMSymbolizer *Symbolizer = nullptr; +static bool Demangle = true; +static bool InlineFrames = true; + static llvm::symbolize::LLVMSymbolizer *getDefaultSymbolizer() { - static llvm::symbolize::LLVMSymbolizer *DefaultSymbolizer = - new llvm::symbolize::LLVMSymbolizer(); - return DefaultSymbolizer; + if (Symbolizer) + return Symbolizer; + llvm::symbolize::LLVMSymbolizer::Options Opts; + Opts.Demangle = Demangle; + Symbolizer = new llvm::symbolize::LLVMSymbolizer(Opts); + return Symbolizer; } static llvm::symbolize::PrinterConfig getDefaultPrinterConfig() { @@ -43,8 +50,7 @@ extern "C" { typedef uint64_t u64; bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, - char *Buffer, int MaxLength, - bool SymbolizeInlineFrames) { + char *Buffer, int MaxLength) { std::string Result; { llvm::raw_string_ostream OS(Result); @@ -55,7 +61,7 @@ bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, // TODO: it is neccessary to set proper SectionIndex here. // object::SectionedAddress::UndefSection works for only absolute addresses. - if (SymbolizeInlineFrames) { + if (InlineFrames) { auto ResOrErr = getDefaultSymbolizer()->symbolizeInlinedCode( ModuleName, {ModuleOffset, llvm::object::SectionedAddress::UndefSection}); @@ -93,7 +99,10 @@ bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset, Result.c_str()) < MaxLength; } -void __sanitizer_symbolize_flush() { getDefaultSymbolizer()->flush(); } +void __sanitizer_symbolize_flush() { + if (Symbolizer) + Symbolizer->flush(); +} int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength) { @@ -105,6 +114,19 @@ int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, : 0; } +bool __sanitizer_symbolize_set_demangle(bool Value) { + // Must be called before LLVMSymbolizer created. + if (Symbolizer) + return false; + Demangle = Value; + return true; +} + +bool __sanitizer_symbolize_set_inline_frames(bool Value) { + InlineFrames = Value; + return true; +} + // Override __cxa_atexit and ignore callbacks. // This prevents crashes in a configuration when the symbolizer // is built into sanitizer runtime and consequently into the test process. diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh index 56a0d99d59c8..599f063b45c9 100755 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh @@ -155,7 +155,12 @@ SYMBOLIZER_FLAGS="$LLVM_FLAGS -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -std $CXX $SYMBOLIZER_FLAGS ${SRC_DIR}/sanitizer_symbolize.cpp ${SRC_DIR}/sanitizer_wrappers.cpp -c $AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o -SYMBOLIZER_API_LIST=__sanitizer_symbolize_code,__sanitizer_symbolize_data,__sanitizer_symbolize_flush,__sanitizer_symbolize_demangle +SYMBOLIZER_API_LIST=__sanitizer_symbolize_code +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_data +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_flush +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_demangle +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_demangle +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_inline_frames LIBCXX_ARCHIVE_DIR=$(dirname $(find $LIBCXX_BUILD -name libc++.a | head -n1)) diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index cb4494f10b85..0bb38ba951a8 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -38,6 +38,8 @@ __sanitizer_symbolize_code T __sanitizer_symbolize_data T __sanitizer_symbolize_demangle T __sanitizer_symbolize_flush T +__sanitizer_symbolize_set_demangle T +__sanitizer_symbolize_set_inline_frames T __strdup U __udivdi3 U __umoddi3 U diff --git a/compiler-rt/lib/sanitizer_common/weak_symbols.txt b/compiler-rt/lib/sanitizer_common/weak_symbols.txt index 5a2b275184f4..d07f81bc8c12 100644 --- a/compiler-rt/lib/sanitizer_common/weak_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/weak_symbols.txt @@ -6,3 +6,5 @@ ___sanitizer_symbolize_code ___sanitizer_symbolize_data ___sanitizer_symbolize_demangle ___sanitizer_symbolize_flush +___sanitizer_symbolize_set_demangle +___sanitizer_symbolize_set_inline_frames diff --git a/compiler-rt/test/dfsan/origin_id_stack_trace.c b/compiler-rt/test/dfsan/origin_id_stack_trace.c new file mode 100644 index 000000000000..5958a8a3bea2 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_id_stack_trace.c @@ -0,0 +1,74 @@ +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out +// +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 -mllvm -dfsan-instrument-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out +// +// REQUIRES: x86_64-target-arch + +#include +#include +#include + +__attribute__((noinline)) int foo(int a, int b) { return a + b; } + +__attribute__((noinline)) void bar(int depth, void *addr, int size) { + if (depth) { + bar(depth - 1, addr, size); + } else { + dfsan_set_label(1, addr, size); + } +} + +__attribute__((noinline)) void baz(int depth, void *addr, int size) { + bar(depth, addr, size); +} + +int main(int argc, char *argv[]) { + int a = 10; + int b = 20; + baz(8, &a, sizeof(a)); + int c = foo(a, b); + dfsan_origin c_o = dfsan_get_origin(c); + dfsan_print_origin_id_trace(c_o); + // CHECK: Origin value: {{.*}}, Taint value was created at + // CHECK: #0 {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-16]] + // CHECK-COUNT-8: #{{[0-9]+}} {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-19]] + // CHECK: #9 {{.*}} in baz.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-13]] + + char buf[3000]; + size_t length = dfsan_sprint_origin_id_trace(c_o, buf, sizeof(buf)); + + printf("==OUTPUT==\n\n%s==EOS==\n", buf); + // CHECK: ==OUTPUT== + // CHECK: Origin value: {{.*}}, Taint value was created at + // CHECK: #0 {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-26]] + // CHECK-COUNT-8: #{{[0-9]+}} {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-29]] + // CHECK: #9 {{.*}} in baz.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-23]] + // CHECK: ==EOS== + + char tinybuf[20]; + size_t same_length = + dfsan_sprint_origin_id_trace(c_o, tinybuf, sizeof(tinybuf)); + + printf("==TRUNCATED OUTPUT==\n\n%s==EOS==\n", tinybuf); + // CHECK: ==TRUNCATED OUTPUT== + // CHECK: Origin value: 0x1==EOS== + + printf("Returned length: %zu\n", length); + printf("Actual length: %zu\n", strlen(buf)); + printf("Returned length with truncation: %zu\n", same_length); + + // CHECK: Returned length: [[#LEN:]] + // CHECK: Actual length: [[#LEN]] + // CHECK: Returned length with truncation: [[#LEN]] + + buf[0] = '\0'; + length = dfsan_sprint_origin_id_trace(c_o, buf, 0); + printf("Output=\"%s\"\n", buf); + printf("Returned length: %zu\n", length); + // CHECK: Output="" + // CHECK: Returned length: [[#LEN]] +} diff --git a/compiler-rt/test/dfsan/origin_of_first_taint.c b/compiler-rt/test/dfsan/origin_of_first_taint.c new file mode 100644 index 000000000000..45cd6146f031 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_of_first_taint.c @@ -0,0 +1,34 @@ +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 %s -o %t && \ +// RUN: %run %t 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +#include +#include +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_set_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + + dfsan_origin c_orig = dfsan_get_origin(c); + fprintf(stderr, "c_orig 0x%x\n", c_orig); + // CHECK: c_orig 0x[[#%x,C_ORIG:]] + assert(c_orig != 0); + dfsan_print_origin_id_trace(c_orig); + // CHECK: Origin value: 0x[[#%x,C_ORIG]], Taint value was created at + + uint64_t d[4] = {1, 2, 3, c}; + dfsan_origin d_orig = dfsan_read_origin_of_first_taint(d, sizeof(d)); + fprintf(stderr, "d_orig 0x%x\n", d_orig); + // CHECK: d_orig 0x[[#%x,D_ORIG:]] + assert(d_orig != 0); + dfsan_print_origin_id_trace(d_orig); + // CHECK: Origin value: 0x[[#%x,D_ORIG]], Taint value was stored to memory at + // CHECK: Origin value: 0x[[#%x,C_ORIG]], Taint value was created at + return 0; +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp index bc29ba45d89a..f789bfd727d5 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp @@ -34,7 +34,9 @@ void test_setbuffer() { print_something(); - setbuffer(stdout, buf, BUFSIZ); + // Ensure that interceptor reads correct size + // (not BUFSIZ as by default, hence BUFSIZ/2). + setbuffer(stdout, buf, BUFSIZ / 2); print_something(); diff --git a/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp new file mode 100644 index 000000000000..62ac5d0b2337 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp @@ -0,0 +1,30 @@ +// RUN: %clangxx -O1 -fno-omit-frame-pointer %s -o %t +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/ %run %t 2>&1 | FileCheck %s +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:demangle=0 %run %t 2>&1 | FileCheck %s --check-prefixes=NODEMANGLE +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:demangle=1 %run %t 2>&1 | FileCheck %s + +// XFAIL: darwin + +#include +#include +#include + +char buffer[10000]; + +__attribute__((noinline)) static void Symbolize() { + __sanitizer_symbolize_pc(__builtin_return_address(0), "%p %F %L", buffer, + sizeof(buffer)); + for (char *p = buffer; strlen(p); p += strlen(p) + 1) + printf("%s\n", p); +} + +struct Symbolizer { + __attribute__((noinline)) ~Symbolizer() { Symbolize(); } +}; + +// NODEMANGLE: in _ZN10SymbolizerD2Ev +// CHECK: in Symbolizer::~Symbolizer +int main() { + Symbolizer(); + return 0; +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp index 9567103155f0..a39f30e0633d 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -O3 %s -o %t // RUN: %env_tool_opts=strip_path_prefix=/TestCases/ %run %t 2>&1 | FileCheck %s -// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:symbolize_inline_frames=0 \ -// RUN: %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-NOINLINE +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:symbolize_inline_frames=0 %run %t 2>&1 | FileCheck %s --check-prefixes=NOINLINE +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:symbolize_inline_frames=1 %run %t 2>&1 | FileCheck %s // XFAIL: darwin @@ -18,7 +18,7 @@ __attribute__((noinline)) static void Symbolize() { printf("%s\n", p); } -// CHECK-NOINLINE: {{0x[0-9a-f]+}} in main symbolize_pc_inline.cpp:[[@LINE+2]] +// NOINLINE: {{0x[0-9a-f]+}} in main symbolize_pc_inline.cpp:[[@LINE+2]] // CHECK: [[ADDR:0x[0-9a-f]+]] in C2 symbolize_pc_inline.cpp:[[@LINE+1]] static inline void C2() { Symbolize(); } diff --git a/compiler-rt/test/tsan/Darwin/main_tid.mm b/compiler-rt/test/tsan/Darwin/main_tid.mm index 6dea58e53326..886880edeb2f 100644 --- a/compiler-rt/test/tsan/Darwin/main_tid.mm +++ b/compiler-rt/test/tsan/Darwin/main_tid.mm @@ -13,6 +13,7 @@ int __tsan_get_report_thread(void *report, unsigned long idx, int *tid, unsigned long trace_size); } +__attribute__((disable_sanitizer_instrumentation)) void __tsan_on_report(void *report) { fprintf(stderr, "__tsan_on_report(%p)\n", report); diff --git a/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c b/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c index 9c7c47e9235b..8139605c753c 100644 --- a/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c +++ b/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c @@ -10,6 +10,8 @@ long g = 0; long h = 0; + +__attribute__((disable_sanitizer_instrumentation)) void f() { static dispatch_once_t onceToken; dispatch_once(&onceToken, ^{ @@ -18,6 +20,7 @@ void f() { h++; } +__attribute__((disable_sanitizer_instrumentation)) void __tsan_on_report() { fprintf(stderr, "Report.\n"); f(); diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index dc2f2b77356d..9666f9c0422a 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -21,6 +21,7 @@ #include "flang/ISO_Fortran_binding.h" #include "flang/Runtime/memory.h" #include "flang/Runtime/type-code.h" +#include #include #include #include diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2698cafca8eb..16efa1edf8f9 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1482,6 +1482,7 @@ CHECK_SIMPLE_CLAUSE(AppendArgs, OMPC_append_args) CHECK_SIMPLE_CLAUSE(MemoryOrder, OMPC_memory_order) CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind) CHECK_SIMPLE_CLAUSE(Align, OMPC_align) +CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp index 6a657e432c18..47e3200f9dcc 100644 --- a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp +++ b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp @@ -94,7 +94,7 @@ JsonFile parseJsonResultFile(StringRef Filename) { json::Path::Root Root; JsonFile JF; if (!fromJSON(JsonValue, JF, Root)) - ExitOnErr(Root.getError()); + ExitOnErr(Root.get_error()); return JF; } diff --git a/libc/config/linux/CMakeLists.txt b/libc/config/linux/CMakeLists.txt index e3a00189a2ad..bc42557f3db9 100644 --- a/libc/config/linux/CMakeLists.txt +++ b/libc/config/linux/CMakeLists.txt @@ -1,15 +1,3 @@ -add_gen_header( - linux_syscall_h - DEF_FILE syscall.h.def - GEN_HDR syscall.h - PARAMS - inline_syscalls=${LIBC_TARGET_ARCHITECTURE}/syscall.h.inc - DATA_FILES - ${LIBC_TARGET_ARCHITECTURE}/syscall.h.inc - DEPENDS - libc.src.__support.common -) - add_header( app_h HDR diff --git a/libc/loader/linux/x86_64/CMakeLists.txt b/libc/loader/linux/x86_64/CMakeLists.txt index ceaa94ee11a3..75b8a23802e7 100644 --- a/libc/loader/linux/x86_64/CMakeLists.txt +++ b/libc/loader/linux/x86_64/CMakeLists.txt @@ -3,9 +3,9 @@ add_loader_object( SRC start.cpp DEPENDS - libc.config.linux.linux_syscall_h libc.config.linux.app_h libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.string.memcpy libc.src.sys.mman.mmap COMPILE_OPTIONS diff --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp index 320fda96105b..55ae00391927 100644 --- a/libc/loader/linux/x86_64/start.cpp +++ b/libc/loader/linux/x86_64/start.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "config/linux/app.h" -#include "config/linux/syscall.h" #include "include/sys/mman.h" #include "include/sys/syscall.h" +#include "src/__support/OSUtil/syscall.h" #include "src/string/memcpy.h" #include "src/sys/mman/mmap.h" diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt index b0f7ac86ffa5..028a9c06123c 100644 --- a/libc/src/__support/OSUtil/CMakeLists.txt +++ b/libc/src/__support/OSUtil/CMakeLists.txt @@ -5,6 +5,7 @@ add_header_library( HDRS io.h quick_exit.h + syscall.h DEPENDS libc.src.__support.OSUtil.linux.linux_util ) diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt index 38286147a914..4b0f5d70a0a2 100644 --- a/libc/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt @@ -1,10 +1,12 @@ +add_subdirectory(x86_64) + add_header_library( linux_util HDRS io.h quick_exit.h + syscall.h DEPENDS - libc.config.linux.linux_syscall_h - libc.include.sys_syscall - libc.src.string.string_utils + .x86_64.linux_x86_64_util + libc.src.__support.common ) diff --git a/libc/src/__support/OSUtil/linux/io.h b/libc/src/__support/OSUtil/linux/io.h index 5aaa544b5142..71830476bfff 100644 --- a/libc/src/__support/OSUtil/linux/io.h +++ b/libc/src/__support/OSUtil/linux/io.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_IO_H #define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_IO_H -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. #include "src/string/string_utils.h" +#include "syscall.h" // For internal syscall function. namespace __llvm_libc { diff --git a/libc/src/__support/OSUtil/linux/quick_exit.h b/libc/src/__support/OSUtil/linux/quick_exit.h index 932959d265b1..eb9d86f446cf 100644 --- a/libc/src/__support/OSUtil/linux/quick_exit.h +++ b/libc/src/__support/OSUtil/linux/quick_exit.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_QUICK_EXIT_H #define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_QUICK_EXIT_H -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "syscall.h" // For internal syscall function. namespace __llvm_libc { diff --git a/libc/src/__support/OSUtil/linux/syscall.h b/libc/src/__support/OSUtil/linux/syscall.h new file mode 100644 index 000000000000..754883e8a667 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/syscall.h @@ -0,0 +1,18 @@ +//===----------------------- Linux syscalls ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_SYSCALL_H +#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_SYSCALL_H + +#include "src/__support/architectures.h" + +#ifdef LLVM_LIBC_ARCH_X86_64 +#include "x86_64/syscall.h" +#endif + +#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_SYSCALL_H diff --git a/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt b/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt new file mode 100644 index 000000000000..a7f2d74e6353 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt @@ -0,0 +1,7 @@ +add_header_library( + linux_x86_64_util + HDRS + syscall.h + DEPENDS + libc.src.__support.common +) diff --git a/libc/config/linux/x86_64/syscall.h.inc b/libc/src/__support/OSUtil/linux/x86_64/syscall.h similarity index 93% rename from libc/config/linux/x86_64/syscall.h.inc rename to libc/src/__support/OSUtil/linux/x86_64/syscall.h index ee3b5e5bfc0d..77d139806271 100644 --- a/libc/config/linux/x86_64/syscall.h.inc +++ b/libc/src/__support/OSUtil/linux/x86_64/syscall.h @@ -1,4 +1,4 @@ -//===------------ inline implementation of x86_64 syscalls ----------------===// +//===---------- inline implementation of x86_64 syscalls ----------* C++ *-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -%%begin() +#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_X86_64_SYSCALL_H +#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_X86_64_SYSCALL_H #include "src/__support/common.h" @@ -100,7 +101,8 @@ __attribute__((always_inline)) inline long syscall(long __number, Ts... ts) { return syscall(__number, (long)ts...); } - #undef SYSCALL_CLOBBER_LIST } // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_X86_64_SYSCALL_H diff --git a/libc/config/linux/syscall.h.def b/libc/src/__support/OSUtil/syscall.h similarity index 65% rename from libc/config/linux/syscall.h.def rename to libc/src/__support/OSUtil/syscall.h index 0daceaf00dde..b1137275a86f 100644 --- a/libc/config/linux/syscall.h.def +++ b/libc/src/__support/OSUtil/syscall.h @@ -6,9 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_CONFIG_LINUX_SYSCALL_H -#define LLVM_LIBC_CONFIG_LINUX_SYSCALL_H +#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_SYSCALL_H +#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_SYSCALL_H -%%include_file(${inline_syscalls}) +#ifdef __unix__ +#include "linux/syscall.h" +#endif -#endif // LLVM_LIBC_CONFIG_LINUX_SYSCALL_H +#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_SYSCALL_H diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index a7ac3e2e9a68..8defeed122a9 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -182,6 +182,114 @@ eisel_lemire(typename fputil::FPBits::UIntType mantissa, int32_t exp10, return true; } +#if !defined(LONG_DOUBLE_IS_DOUBLE) +template <> +inline bool eisel_lemire( + typename fputil::FPBits::UIntType mantissa, int32_t exp10, + typename fputil::FPBits::UIntType *outputMantissa, + uint32_t *outputExp2) { + using BitsType = typename fputil::FPBits::UIntType; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; + + // Exp10 Range + // This doesn't reach very far into the range for long doubles, since it's + // sized for doubles and their 11 exponent bits, and not for long doubles and + // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long + // double). This is a known tradeoff, and was made because a proper long + // double table would be approximately 16 times larger. This would have + // significant memory and storage costs all the time to speed up a relatively + // uncommon path. In addition the exp10_to_exp2 function only approximates + // multiplying by log(10)/log(2), and that approximation may not be accurate + // out to the full long double range. + if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || + exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { + return false; + } + + // Normalization + uint32_t clz = leading_zeroes(mantissa); + mantissa <<= clz; + + uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + + fputil::FloatProperties::EXPONENT_BIAS - clz; + + // Multiplication + const uint64_t *power_of_ten = + DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; + + // Since the input mantissa is more than 64 bits, we have to multiply with the + // full 128 bits of the power of ten to get an approximation with the same + // number of significant bits. This means that we only get the one + // approximation, and that approximation is 256 bits long. + __uint128_t approx_upper = static_cast<__uint128_t>(high64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[1]); + + __uint128_t approx_middle = static_cast<__uint128_t>(high64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[0]) + + static_cast<__uint128_t>(low64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[1]); + + __uint128_t approx_lower = static_cast<__uint128_t>(low64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[0]); + + __uint128_t final_approx_lower = + approx_lower + (static_cast<__uint128_t>(low64(approx_middle)) << 64); + __uint128_t final_approx_upper = approx_upper + high64(approx_middle) + + (final_approx_lower < approx_lower ? 1 : 0); + + // The halfway constant is used to check if the bits that will be shifted away + // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 + // (final mantissa size) - 3 (we shift away the last two bits separately for + // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, + // it's 12 bits for 128 bit floats in this case. + constexpr __uint128_t HALFWAY_CONSTANT = + (__uint128_t(1) << (BITS_IN_MANTISSA - + fputil::FloatProperties::MANTISSA_WIDTH - + 3)) - + 1; + + if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && + final_approx_lower + mantissa < mantissa) { + return false; + } + + // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats + BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1); + BitsType final_mantissa = + final_approx_upper >> + (msb + BITS_IN_MANTISSA - + (fputil::FloatProperties::MANTISSA_WIDTH + 3)); + exp2 -= 1 ^ msb; // same as !msb + + // Half-way ambiguity + if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && + (final_mantissa & 3) == 1) { + return false; + } + + // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit + // floats + final_mantissa += final_mantissa & 1; + final_mantissa >>= 1; + if ((final_mantissa >> + (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { + final_mantissa >>= 1; + ++exp2; + } + + // The if block is equivalent to (but has fewer branches than): + // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } + if (exp2 - 1 >= + (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { + return false; + } + + *outputMantissa = final_mantissa; + *outputExp2 = exp2; + return true; +} +#endif + // The nth item in POWERS_OF_TWO represents the greatest power of two less than // 10^n. This tells us how much we can safely shift without overshooting. constexpr uint8_t POWERS_OF_TWO[19] = { diff --git a/libc/src/assert/CMakeLists.txt b/libc/src/assert/CMakeLists.txt index 36123cc6bd41..7f82017e51c5 100644 --- a/libc/src/assert/CMakeLists.txt +++ b/libc/src/assert/CMakeLists.txt @@ -8,7 +8,7 @@ add_entrypoint_object( DEPENDS # These two dependencies are temporary and should be replaced by fprintf # later. - libc.config.linux.linux_syscall_h + libc.src.__support.OSUtil.osutil libc.include.sys_syscall libc.src.stdlib.abort ) diff --git a/libc/src/assert/__assert_fail.cpp b/libc/src/assert/__assert_fail.cpp index 0fe2ca6da87c..93c176a62231 100644 --- a/libc/src/assert/__assert_fail.cpp +++ b/libc/src/assert/__assert_fail.cpp @@ -10,8 +10,8 @@ #include "src/stdlib/abort.h" // These includes are temporary. -#include "config/linux/syscall.h" // For internal syscall function. #include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. namespace __llvm_libc { diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp index 523045ffa684..e85c030a02ec 100644 --- a/libc/src/math/generic/logf.cpp +++ b/libc/src/math/generic/logf.cpp @@ -167,20 +167,20 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) { m += xbits.get_exponent(); // Set bits to 1.m xbits.set_unbiased_exponent(0x7F); - int fIndex = xbits.get_mantissa() >> 16; + int f_index = xbits.get_mantissa() >> 16; FPBits f(xbits.val); f.bits &= ~0x0000'FFFF; double d = static_cast(xbits) - static_cast(f); - d *= ONE_OVER_F[fIndex]; + d *= ONE_OVER_F[f_index]; double r = __llvm_libc::fputil::polyeval( d, 0x1.0000000008169p+0, -0x1.0000004f78405p-1, 0x1.555654d2bc769p-2, -0x1.00a570d090322p-2, 0x1.e158d823f89cap-3); double extra_factor = - __llvm_libc::fputil::fma(static_cast(m), LOG_2, LOG_F[fIndex]); + __llvm_libc::fputil::fma(static_cast(m), LOG_2, LOG_F[f_index]); switch (FPBits(x).uintval()) { case 0x3f80d19f: return 0x1.a1e82cp-8f; diff --git a/libc/src/signal/linux/CMakeLists.txt b/libc/src/signal/linux/CMakeLists.txt index fc4a7aa32bf2..419146f5bd44 100644 --- a/libc/src/signal/linux/CMakeLists.txt +++ b/libc/src/signal/linux/CMakeLists.txt @@ -7,9 +7,9 @@ add_entrypoint_object( signal.h ../raise.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.signal libc.include.sys_syscall + libc.src.__support.OSUtil.osutil ) add_object_library( @@ -25,8 +25,8 @@ add_object_library( # asan creates asan.module_ctor which uses stack space, causing warnings. -fno-sanitize=address DEPENDS - libc.config.linux.linux_syscall_h libc.include.sys_syscall + libc.src.__support.OSUtil.osutil ) add_entrypoint_object( @@ -38,9 +38,9 @@ add_entrypoint_object( ../sigaction.h DEPENDS .__restore - libc.config.linux.linux_syscall_h libc.include.signal libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) @@ -52,9 +52,9 @@ add_entrypoint_object( signal.h ../sigprocmask.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.signal libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) diff --git a/libc/src/signal/linux/__restore.cpp b/libc/src/signal/linux/__restore.cpp index 8ce848f4960b..c648ddb4cb15 100644 --- a/libc/src/signal/linux/__restore.cpp +++ b/libc/src/signal/linux/__restore.cpp @@ -10,8 +10,8 @@ // strongly control the options this file is compiled with. __restore_rt cannot // make any stack allocations so we must ensure this. -#include "config/linux/syscall.h" #include "include/sys/syscall.h" +#include "src/__support/OSUtil/syscall.h" namespace __llvm_libc { diff --git a/libc/src/signal/linux/signal.h b/libc/src/signal/linux/signal.h index a59a56ced5d6..fd312d79ed72 100644 --- a/libc/src/signal/linux/signal.h +++ b/libc/src/signal/linux/signal.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_LINUX_SIGNAL_H #define LLVM_LIBC_SRC_SIGNAL_LINUX_SIGNAL_H -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "include/signal.h" diff --git a/libc/src/stdlib/linux/CMakeLists.txt b/libc/src/stdlib/linux/CMakeLists.txt index 27e00a35c70f..c5130c11ed91 100644 --- a/libc/src/stdlib/linux/CMakeLists.txt +++ b/libc/src/stdlib/linux/CMakeLists.txt @@ -6,6 +6,6 @@ add_entrypoint_object( ../_Exit.h DEPENDS libc.include.sys_syscall - libc.config.linux.linux_syscall_h libc.include.stdlib + libc.src.__support.OSUtil.osutil ) diff --git a/libc/src/stdlib/linux/_Exit.cpp b/libc/src/stdlib/linux/_Exit.cpp index 7fdd60ffe9bc..51b0a48c7521 100644 --- a/libc/src/stdlib/linux/_Exit.cpp +++ b/libc/src/stdlib/linux/_Exit.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/stdlib/_Exit.h" diff --git a/libc/src/sys/mman/linux/CMakeLists.txt b/libc/src/sys/mman/linux/CMakeLists.txt index 7bf7c2f96d2d..949d965fdbe9 100644 --- a/libc/src/sys/mman/linux/CMakeLists.txt +++ b/libc/src/sys/mman/linux/CMakeLists.txt @@ -5,9 +5,9 @@ add_entrypoint_object( HDRS ../mmap.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.sys_mman libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) @@ -18,8 +18,8 @@ add_entrypoint_object( HDRS ../munmap.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.sys_mman libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) diff --git a/libc/src/sys/mman/linux/mmap.cpp b/libc/src/sys/mman/linux/mmap.cpp index 31e114a7f66b..0447ef129891 100644 --- a/libc/src/sys/mman/linux/mmap.cpp +++ b/libc/src/sys/mman/linux/mmap.cpp @@ -8,8 +8,8 @@ #include "src/sys/mman/mmap.h" -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" diff --git a/libc/src/sys/mman/linux/munmap.cpp b/libc/src/sys/mman/linux/munmap.cpp index fd89773cc829..7801a637f23f 100644 --- a/libc/src/sys/mman/linux/munmap.cpp +++ b/libc/src/sys/mman/linux/munmap.cpp @@ -8,8 +8,8 @@ #include "src/sys/mman/munmap.h" -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" diff --git a/libc/src/threads/linux/CMakeLists.txt b/libc/src/threads/linux/CMakeLists.txt index e9fc8d2d9c44..fcc3381e68bb 100644 --- a/libc/src/threads/linux/CMakeLists.txt +++ b/libc/src/threads/linux/CMakeLists.txt @@ -16,9 +16,9 @@ add_entrypoint_object( ../call_once.h DEPENDS .threads_utils - libc.config.linux.linux_syscall_h libc.include.sys_syscall libc.include.threads + libc.src.__support.OSUtil.osutil ) add_header_library( @@ -30,9 +30,9 @@ add_header_library( Thread.h DEPENDS .thread_start_args_h - libc.config.linux.linux_syscall_h libc.include.sys_syscall libc.include.threads + libc.src.__support.OSUtil.osutil ) add_entrypoint_object( @@ -43,11 +43,11 @@ add_entrypoint_object( ../thrd_create.h DEPENDS .threads_utils - libc.config.linux.linux_syscall_h libc.include.errno libc.include.sys_syscall libc.include.threads libc.src.__support.common + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location libc.src.sys.mman.mmap COMPILE_OPTIONS @@ -64,11 +64,11 @@ add_entrypoint_object( ../thrd_join.h DEPENDS .threads_utils - libc.config.linux.linux_syscall_h libc.include.sys_syscall libc.include.threads - libc.src.sys.mman.munmap libc.src.__support.common + libc.src.__support.OSUtil.osutil + libc.src.sys.mman.munmap ) add_entrypoint_object( diff --git a/libc/src/threads/linux/CndVar.h b/libc/src/threads/linux/CndVar.h index 5d9ced5d6457..7e531c87ff1f 100644 --- a/libc/src/threads/linux/CndVar.h +++ b/libc/src/threads/linux/CndVar.h @@ -12,9 +12,9 @@ #include "Futex.h" #include "Mutex.h" -#include "config/linux/syscall.h" // For syscall functions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For values like thrd_success etc. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For values like thrd_success etc. +#include "src/__support/OSUtil/syscall.h" // For syscall functions. #include // For futex operations. #include // For atomic operations diff --git a/libc/src/threads/linux/Mutex.h b/libc/src/threads/linux/Mutex.h index f6033c92eaf6..a7b06d4c2bab 100644 --- a/libc/src/threads/linux/Mutex.h +++ b/libc/src/threads/linux/Mutex.h @@ -11,9 +11,9 @@ #include "Futex.h" -#include "config/linux/syscall.h" // For syscall functions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For values like thrd_success etc. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For values like thrd_success etc. +#include "src/__support/OSUtil/syscall.h" // For syscall functions. #include // For futex operations. #include diff --git a/libc/src/threads/linux/call_once.cpp b/libc/src/threads/linux/call_once.cpp index 1bcb46f6b0a3..9b9664634a1f 100644 --- a/libc/src/threads/linux/call_once.cpp +++ b/libc/src/threads/linux/call_once.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "src/threads/call_once.h" -#include "config/linux/syscall.h" // For syscall functions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For call_once related type definition. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For call_once related type definition. +#include "src/__support/OSUtil/syscall.h" // For syscall functions. #include "src/__support/common.h" #include "src/threads/linux/Futex.h" diff --git a/libc/src/threads/linux/thrd_create.cpp b/libc/src/threads/linux/thrd_create.cpp index f2464d4075ea..5421c96775b7 100644 --- a/libc/src/threads/linux/thrd_create.cpp +++ b/libc/src/threads/linux/thrd_create.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "src/threads/thrd_create.h" -#include "config/linux/syscall.h" // For syscall function. -#include "include/errno.h" // For E* error values. -#include "include/sys/mman.h" // For PROT_* and MAP_* definitions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For thrd_* type definitions. +#include "include/errno.h" // For E* error values. +#include "include/sys/mman.h" // For PROT_* and MAP_* definitions. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For thrd_* type definitions. +#include "src/__support/OSUtil/syscall.h" // For syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" #include "src/sys/mman/mmap.h" diff --git a/libc/src/threads/linux/thrd_join.cpp b/libc/src/threads/linux/thrd_join.cpp index 3c779495def5..f55f5a3af3f5 100644 --- a/libc/src/threads/linux/thrd_join.cpp +++ b/libc/src/threads/linux/thrd_join.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "src/threads/thrd_join.h" -#include "config/linux/syscall.h" // For syscall function. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For thrd_* type definitions. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For thrd_* type definitions. +#include "src/__support/OSUtil/syscall.h" // For syscall function. #include "src/__support/common.h" #include "src/sys/mman/munmap.h" #include "src/threads/linux/Futex.h" diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index bd2cba708e69..1c3fdf15a6a7 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -6,7 +6,7 @@ add_entrypoint_object( ../write.h DEPENDS libc.include.unistd - libc.config.linux.linux_syscall_h libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) diff --git a/libc/src/unistd/linux/write.cpp b/libc/src/unistd/linux/write.cpp index de0efb9bf8de..3554e3030943 100644 --- a/libc/src/unistd/linux/write.cpp +++ b/libc/src/unistd/linux/write.cpp @@ -8,8 +8,8 @@ #include "src/unistd/write.h" -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" diff --git a/libc/test/CMakeLists.txt b/libc/test/CMakeLists.txt index 3cf1f8d59d95..850d69345d29 100644 --- a/libc/test/CMakeLists.txt +++ b/libc/test/CMakeLists.txt @@ -18,6 +18,5 @@ if(NOT LLVM_LIBC_FULL_BUILD) return() endif() -add_subdirectory(config) add_subdirectory(integration) add_subdirectory(loader) diff --git a/libc/test/config/CMakeLists.txt b/libc/test/config/CMakeLists.txt deleted file mode 100644 index 52824d270583..000000000000 --- a/libc/test/config/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(${LIBC_TARGET_OS}) diff --git a/libc/test/config/linux/x86_64/CMakeLists.txt b/libc/test/config/linux/x86_64/CMakeLists.txt deleted file mode 100644 index 0436736eefd0..000000000000 --- a/libc/test/config/linux/x86_64/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_libc_unittest( - libc_linux_x86_64_syscall_unittest - SUITE libc_linux_tests - SRCS syscall_test.cpp - DEPENDS - libc.config.linux.linux_syscall_h -) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index d2367ef52f69..20d0e2d75745 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -48,3 +48,5 @@ add_custom_command(TARGET libc_str_to_float_comparison_test DEPENDS ${float_test_file} COMMENT "Test the strtof and strtod implementations against precomputed results." VERBATIM) + +add_subdirectory(OSUtil) diff --git a/libc/test/src/__support/OSUtil/CMakeLists.txt b/libc/test/src/__support/OSUtil/CMakeLists.txt new file mode 100644 index 000000000000..c70fedf3ff04 --- /dev/null +++ b/libc/test/src/__support/OSUtil/CMakeLists.txt @@ -0,0 +1,5 @@ +add_libc_testsuite(libc_osutil_tests) + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + add_subdirectory(${LIBC_TARGET_OS}) +endif() diff --git a/libc/test/config/linux/CMakeLists.txt b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt similarity index 76% rename from libc/test/config/linux/CMakeLists.txt rename to libc/test/src/__support/OSUtil/linux/CMakeLists.txt index b449a33ec94c..bfb072c03e97 100644 --- a/libc/test/config/linux/CMakeLists.txt +++ b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt @@ -1,5 +1,3 @@ -add_libc_testsuite(libc_linux_tests) - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) endif() diff --git a/libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt b/libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt new file mode 100644 index 000000000000..b6f57eb41f72 --- /dev/null +++ b/libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt @@ -0,0 +1,7 @@ +add_libc_unittest( + syscall_unittest + SUITE libc_osutil_tests + SRCS syscall_test.cpp + DEPENDS + libc.src.__support.OSUtil.osutil +) diff --git a/libc/test/config/linux/x86_64/syscall_test.cpp b/libc/test/src/__support/OSUtil/linux/x86_64/syscall_test.cpp similarity index 95% rename from libc/test/config/linux/x86_64/syscall_test.cpp rename to libc/test/src/__support/OSUtil/linux/x86_64/syscall_test.cpp index 971b571fce4d..2f9816e96829 100644 --- a/libc/test/config/linux/x86_64/syscall_test.cpp +++ b/libc/test/src/__support/OSUtil/linux/x86_64/syscall_test.cpp @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "config/linux/syscall.h" -#include "utils/UnitTest/Test.h" - #include "src/__support/CPP/Functional.h" +#include "src/__support/OSUtil/syscall.h" +#include "utils/UnitTest/Test.h" TEST(LlvmLibcX86_64_SyscallTest, APITest) { // We only do a signature test here. Actual functionality tests are @@ -39,6 +38,6 @@ TEST(LlvmLibcX86_64_SyscallTest, APITest) { return __llvm_libc::syscall(n, a1, a2, a3, a4, a5, a6); }); - Function notLongType( + Function not_long_type( [](long n, void *a1) { return __llvm_libc::syscall(n, a1); }); } diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp index 5728185e54bb..e75434621157 100644 --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -14,201 +14,204 @@ class LlvmLibcStrToFloatTest : public __llvm_libc::testing::Test { public: template - void ClingerFastPathTest( + void clinger_fast_path_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10, const typename __llvm_libc::fputil::FPBits::UIntType expectedOutputMantissa, const uint32_t expectedOutputExp2) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; ASSERT_TRUE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actualOutputMantissa, &actualOutputExp2)); - EXPECT_EQ(actualOutputMantissa, expectedOutputMantissa); - EXPECT_EQ(actualOutputExp2, expectedOutputExp2); + inputMantissa, inputExp10, &actual_output_mantissa, + &actual_output_exp2)); + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); + EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } template - void ClingerFastPathFailsTest( + void clinger_fast_path_fails_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; ASSERT_FALSE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actualOutputMantissa, &actualOutputExp2)); + inputMantissa, inputExp10, &actual_output_mantissa, + &actual_output_exp2)); } template - void EiselLemireTest( + void eisel_lemire_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10, const typename __llvm_libc::fputil::FPBits::UIntType expectedOutputMantissa, const uint32_t expectedOutputExp2) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; ASSERT_TRUE(__llvm_libc::internal::eisel_lemire( - inputMantissa, inputExp10, &actualOutputMantissa, &actualOutputExp2)); - EXPECT_EQ(actualOutputMantissa, expectedOutputMantissa); - EXPECT_EQ(actualOutputExp2, expectedOutputExp2); + inputMantissa, inputExp10, &actual_output_mantissa, + &actual_output_exp2)); + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); + EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } template - void SimpleDecimalConversionTest( + void simple_decimal_conversion_test( const char *__restrict numStart, const typename __llvm_libc::fputil::FPBits::UIntType expectedOutputMantissa, const uint32_t expectedOutputExp2, const int expectedErrno = 0) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; errno = 0; __llvm_libc::internal::simple_decimal_conversion( - numStart, &actualOutputMantissa, &actualOutputExp2); - EXPECT_EQ(actualOutputMantissa, expectedOutputMantissa); - EXPECT_EQ(actualOutputExp2, expectedOutputExp2); + numStart, &actual_output_mantissa, &actual_output_exp2); + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); + EXPECT_EQ(actual_output_exp2, expectedOutputExp2); EXPECT_EQ(errno, expectedErrno); } }; TEST(LlvmLibcStrToFloatTest, LeadingZeroes) { - uint64_t testNum64 = 1; - uint32_t numOfZeroes = 63; + uint64_t test_num64 = 1; + uint32_t num_of_zeroes = 63; EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0), 64u); - for (; numOfZeroes < 64; testNum64 <<= 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum64), - numOfZeroes); + for (; num_of_zeroes < 64; test_num64 <<= 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num64), + num_of_zeroes); } - testNum64 = 3; - numOfZeroes = 62; - for (; numOfZeroes > 63; testNum64 <<= 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum64), - numOfZeroes); + test_num64 = 3; + num_of_zeroes = 62; + for (; num_of_zeroes > 63; test_num64 <<= 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num64), + num_of_zeroes); } EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0xffffffffffffffff), 0u); - testNum64 = 1; - numOfZeroes = 63; - for (; numOfZeroes > 63; testNum64 = (testNum64 << 1) + 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum64), - numOfZeroes); + test_num64 = 1; + num_of_zeroes = 63; + for (; num_of_zeroes > 63; + test_num64 = (test_num64 << 1) + 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num64), + num_of_zeroes); } - uint64_t testNum32 = 1; - numOfZeroes = 31; + uint64_t test_num32 = 1; + num_of_zeroes = 31; EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0), 32u); - for (; numOfZeroes < 32; testNum32 <<= 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum32), - numOfZeroes); + for (; num_of_zeroes < 32; test_num32 <<= 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num32), + num_of_zeroes); } EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0xffffffff), 0u); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat64Simple) { - ClingerFastPathTest(123, 0, 0xEC00000000000, 1029); - ClingerFastPathTest(1234567890123456, 1, 0x5ee2a2eb5a5c0, 1076); - ClingerFastPathTest(1234567890, -10, 0xf9add3739635f, 1019); + clinger_fast_path_test(123, 0, 0xEC00000000000, 1029); + clinger_fast_path_test(1234567890123456, 1, 0x5ee2a2eb5a5c0, 1076); + clinger_fast_path_test(1234567890, -10, 0xf9add3739635f, 1019); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat64ExtendedExp) { - ClingerFastPathTest(1, 30, 0x93e5939a08cea, 1122); - ClingerFastPathTest(1, 37, 0xe17b84357691b, 1145); - ClingerFastPathFailsTest(10, 37); - ClingerFastPathFailsTest(1, 100); + clinger_fast_path_test(1, 30, 0x93e5939a08cea, 1122); + clinger_fast_path_test(1, 37, 0xe17b84357691b, 1145); + clinger_fast_path_fails_test(10, 37); + clinger_fast_path_fails_test(1, 100); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat64NegativeExp) { - ClingerFastPathTest(1, -10, 0xb7cdfd9d7bdbb, 989); - ClingerFastPathTest(1, -20, 0x79ca10c924223, 956); - ClingerFastPathFailsTest(1, -25); + clinger_fast_path_test(1, -10, 0xb7cdfd9d7bdbb, 989); + clinger_fast_path_test(1, -20, 0x79ca10c924223, 956); + clinger_fast_path_fails_test(1, -25); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat32Simple) { - ClingerFastPathTest(123, 0, 0x760000, 133); - ClingerFastPathTest(1234567, 1, 0x3c6146, 150); - ClingerFastPathTest(12345, -5, 0x7cd35b, 123); + clinger_fast_path_test(123, 0, 0x760000, 133); + clinger_fast_path_test(1234567, 1, 0x3c6146, 150); + clinger_fast_path_test(12345, -5, 0x7cd35b, 123); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat32ExtendedExp) { - ClingerFastPathTest(1, 15, 0x635fa9, 176); - ClingerFastPathTest(1, 17, 0x31a2bc, 183); - ClingerFastPathFailsTest(10, 17); - ClingerFastPathFailsTest(1, 50); + clinger_fast_path_test(1, 15, 0x635fa9, 176); + clinger_fast_path_test(1, 17, 0x31a2bc, 183); + clinger_fast_path_fails_test(10, 17); + clinger_fast_path_fails_test(1, 50); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat32NegativeExp) { - ClingerFastPathTest(1, -5, 0x27c5ac, 110); - ClingerFastPathTest(1, -10, 0x5be6ff, 93); - ClingerFastPathFailsTest(1, -15); + clinger_fast_path_test(1, -5, 0x27c5ac, 110); + clinger_fast_path_test(1, -10, 0x5be6ff, 93); + clinger_fast_path_fails_test(1, -15); } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64Simple) { - EiselLemireTest(12345678901234567890u, 1, 0x1AC53A7E04BCDA, 1089); - EiselLemireTest(123, 0, 0x1EC00000000000, 1029); - EiselLemireTest(12345678901234568192u, 0, 0x156A95319D63E2, 1086); + eisel_lemire_test(12345678901234567890u, 1, 0x1AC53A7E04BCDA, 1089); + eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); + eisel_lemire_test(12345678901234568192u, 0, 0x156A95319D63E2, 1086); } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64SpecificFailures) { // These test cases have caused failures in the past. - EiselLemireTest(358416272, -33, 0x1BBB2A68C9D0B9, 941); - EiselLemireTest(2166568064000000238u, -9, 0x10246690000000, 1054); - EiselLemireTest(2794967654709307187u, 1, 0x183e132bc608c8, 1087); - EiselLemireTest(2794967654709307188u, 1, 0x183e132bc608c9, 1087); + eisel_lemire_test(358416272, -33, 0x1BBB2A68C9D0B9, 941); + eisel_lemire_test(2166568064000000238u, -9, 0x10246690000000, 1054); + eisel_lemire_test(2794967654709307187u, 1, 0x183e132bc608c8, 1087); + eisel_lemire_test(2794967654709307188u, 1, 0x183e132bc608c9, 1087); } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFallbackStates) { // Check the fallback states for the algorithm: - uint32_t floatOutputMantissa = 0; - uint64_t doubleOutputMantissa = 0; - __uint128_t tooLongMantissa = 0; - uint32_t outputExp2 = 0; - - // This Eisel-Lemire implementation doesn't support long doubles yet. - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - tooLongMantissa, 0, &tooLongMantissa, &outputExp2)); + uint32_t float_output_mantissa = 0; + uint64_t double_output_mantissa = 0; + uint32_t output_exp2 = 0; // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away // from both of its closest floating point approximations // (12345678901234548736 and 12345678901234550784) ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 12345678901234549760u, 0, &doubleOutputMantissa, &outputExp2)); + 12345678901234549760u, 0, &double_output_mantissa, &output_exp2)); ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 20040229, 0, &floatOutputMantissa, &outputExp2)); + 20040229, 0, &float_output_mantissa, &output_exp2)); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicWholeNumbers) { - SimpleDecimalConversionTest("123456789012345678900", 0x1AC53A7E04BCDA, - 1089); - SimpleDecimalConversionTest("123", 0x1EC00000000000, 1029); - SimpleDecimalConversionTest("12345678901234549760", 0x156A95319D63D8, - 1086); + simple_decimal_conversion_test("123456789012345678900", + 0x1AC53A7E04BCDA, 1089); + simple_decimal_conversion_test("123", 0x1EC00000000000, 1029); + simple_decimal_conversion_test("12345678901234549760", + 0x156A95319D63D8, 1086); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicDecimals) { - SimpleDecimalConversionTest("1.2345", 0x13c083126e978d, 1023); - SimpleDecimalConversionTest(".2345", 0x1e04189374bc6a, 1020); - SimpleDecimalConversionTest(".299792458", 0x132fccb4aca314, 1021); + simple_decimal_conversion_test("1.2345", 0x13c083126e978d, 1023); + simple_decimal_conversion_test(".2345", 0x1e04189374bc6a, 1020); + simple_decimal_conversion_test(".299792458", 0x132fccb4aca314, 1021); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicExponents) { - SimpleDecimalConversionTest("1e10", 0x12a05f20000000, 1056); - SimpleDecimalConversionTest("1e-10", 0x1b7cdfd9d7bdbb, 989); - SimpleDecimalConversionTest("1e300", 0x17e43c8800759c, 2019); - SimpleDecimalConversionTest("1e-300", 0x156e1fc2f8f359, 26); + simple_decimal_conversion_test("1e10", 0x12a05f20000000, 1056); + simple_decimal_conversion_test("1e-10", 0x1b7cdfd9d7bdbb, 989); + simple_decimal_conversion_test("1e300", 0x17e43c8800759c, 2019); + simple_decimal_conversion_test("1e-300", 0x156e1fc2f8f359, 26); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicSubnormals) { - SimpleDecimalConversionTest("1e-320", 0x7e8, 0, ERANGE); - SimpleDecimalConversionTest("1e-308", 0x730d67819e8d2, 0, ERANGE); - SimpleDecimalConversionTest("2.9e-308", 0x14da6df5e4bcc8, 1); + simple_decimal_conversion_test("1e-320", 0x7e8, 0, ERANGE); + simple_decimal_conversion_test("1e-308", 0x730d67819e8d2, 0, ERANGE); + simple_decimal_conversion_test("2.9e-308", 0x14da6df5e4bcc8, 1); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64SubnormalRounding) { @@ -216,40 +219,40 @@ TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64SubnormalRounding) { // Technically you can keep adding digits until you hit the truncation limit, // but this is the shortest string that results in the maximum subnormal that // I found. - SimpleDecimalConversionTest("2.225073858507201e-308", 0xfffffffffffff, - 0, ERANGE); + simple_decimal_conversion_test("2.225073858507201e-308", + 0xfffffffffffff, 0, ERANGE); // Same here, if you were to extend the max subnormal out for another 800 // digits, incrementing any one of those digits would create a normal number. - SimpleDecimalConversionTest("2.2250738585072012e-308", - 0x10000000000000, 1); + simple_decimal_conversion_test("2.2250738585072012e-308", + 0x10000000000000, 1); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion32SpecificFailures) { - SimpleDecimalConversionTest( + simple_decimal_conversion_test( "1.4012984643248170709237295832899161312802619418765e-45", 0x1, 0, ERANGE); } TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) { - uint32_t floatOutputMantissa = 0; - uint32_t outputExp2 = 0; + uint32_t float_output_mantissa = 0; + uint32_t output_exp2 = 0; errno = 0; __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &floatOutputMantissa, &outputExp2); - EXPECT_EQ(floatOutputMantissa, uint32_t(0xd629d4)); - EXPECT_EQ(outputExp2, uint32_t(193)); + "123456789012345678900", &float_output_mantissa, &output_exp2); + EXPECT_EQ(float_output_mantissa, uint32_t(0xd629d4)); + EXPECT_EQ(output_exp2, uint32_t(193)); EXPECT_EQ(errno, 0); - uint64_t doubleOutputMantissa = 0; - outputExp2 = 0; + uint64_t double_output_mantissa = 0; + output_exp2 = 0; errno = 0; __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &doubleOutputMantissa, &outputExp2); - EXPECT_EQ(doubleOutputMantissa, uint64_t(0x1AC53A7E04BCDA)); - EXPECT_EQ(outputExp2, uint32_t(1089)); + "123456789012345678900", &double_output_mantissa, &output_exp2); + EXPECT_EQ(double_output_mantissa, uint64_t(0x1AC53A7E04BCDA)); + EXPECT_EQ(output_exp2, uint32_t(1089)); EXPECT_EQ(errno, 0); // TODO(michaelrj): Get long double support working. @@ -264,3 +267,92 @@ TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) { // EXPECT_EQ(outputExp2, uint32_t(1089)); // EXPECT_EQ(errno, 0); } + +#if defined(LONG_DOUBLE_IS_DOUBLE) +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64AsLongDouble) { + eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); +} +#elif defined(SPECIAL_X86_LONG_DOUBLE) +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Simple) { + eisel_lemire_test(123, 0, 0xf600000000000000, 16389); + eisel_lemire_test(12345678901234568192u, 0, 0xab54a98ceb1f0c00, + 16446); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80LongerMantissa) { + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + 0, 0x91a2b3c091a2b3c1, 16507); + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + 300, 0xd97757de56adb65c, 17503); + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + -300, 0xc30feb9a7618457d, 15510); +} + +// These tests check numbers at the edge of the DETAILED_POWERS_OF_TEN table. +// This doesn't reach very far into the range for long doubles, since it's sized +// for doubles and their 11 exponent bits, and not for long doubles and their +// 15 exponent bits. This is a known tradeoff, and was made because a proper +// long double table would be approximately 16 times longer (specifically the +// maximum exponent would need to be about 5000, leading to a 10,000 entry +// table). This would have significant memory and storage costs all the time to +// speed up a relatively uncommon path. +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80TableLimits) { + eisel_lemire_test(1, 347, 0xd13eb46469447567, 17535); + eisel_lemire_test(1, -348, 0xfa8fd5a0081c0288, 15226); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Fallback) { + uint32_t outputExp2 = 0; + __uint128_t quadOutputMantissa = 0; + + // This number is halfway between two possible results, and the algorithm + // can't determine which is correct. + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 12345678901234567890u, 1, &quadOutputMantissa, &outputExp2)); + + // These numbers' exponents are out of range for the current powers of ten + // table. + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 1, 1000, &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 1, -1000, &quadOutputMantissa, &outputExp2)); +} +#else +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Simple) { + eisel_lemire_test(123, 0, (__uint128_t(0x1ec0000000000) << 64), + 16389); + eisel_lemire_test(12345678901234568192u, 0, + (__uint128_t(0x156a95319d63e) << 64) + + __uint128_t(0x1800000000000000), + 16446); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128LongerMantissa) { + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + 0, (__uint128_t(0x1234567812345) << 64) + __uint128_t(0x6781234567812345), + 16507); + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + 300, + (__uint128_t(0x1b2eeafbcad5b) << 64) + __uint128_t(0x6cb8b4451dfcde19), + 17503); + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + -300, + (__uint128_t(0x1861fd734ec30) << 64) + __uint128_t(0x8afa7189f0f7595f), + 15510); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Fallback) { + uint32_t outputExp2 = 0; + __uint128_t quadOutputMantissa = 0; + + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + (__uint128_t(0x5ce0e9a56015fec5) << 64) + __uint128_t(0xaadfa328ae39b333), + 1, &quadOutputMantissa, &outputExp2)); +} +#endif diff --git a/libc/test/src/fenv/enabled_exceptions_test.cpp b/libc/test/src/fenv/enabled_exceptions_test.cpp index acc3b09d4aca..75463c34898a 100644 --- a/libc/test/src/fenv/enabled_exceptions_test.cpp +++ b/libc/test/src/fenv/enabled_exceptions_test.cpp @@ -46,7 +46,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndCrash) { // We '|' the individual exception flags instead of using FE_ALL_EXCEPT // as it can include non-standard extensions. Note that we should be able // to compile this file with headers from other libcs as well. - constexpr int allExcepts = + constexpr int ALL_EXCEPTS = FE_DIVBYZERO | FE_INVALID | FE_INEXACT | FE_OVERFLOW | FE_UNDERFLOW; for (int e : excepts) { @@ -59,7 +59,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndCrash) { // can raise FE_INEXACT as well, we don't verify the other // exception flags when FE_INEXACT is enabled. if (e != FE_INEXACT) { - int others = allExcepts & ~e; + int others = ALL_EXCEPTS & ~e; ASSERT_EQ(__llvm_libc::feraiseexcept(others), 0); ASSERT_EQ(__llvm_libc::fetestexcept(others), others); } diff --git a/libc/test/src/fenv/exception_status_test.cpp b/libc/test/src/fenv/exception_status_test.cpp index a7c0164d370d..f897e65439f2 100644 --- a/libc/test/src/fenv/exception_status_test.cpp +++ b/libc/test/src/fenv/exception_status_test.cpp @@ -25,7 +25,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { int excepts[] = {FE_DIVBYZERO, FE_INVALID, FE_INEXACT, FE_OVERFLOW, FE_UNDERFLOW}; - constexpr int allExcepts = + constexpr int ALL_EXCEPTS = FE_DIVBYZERO | FE_INVALID | FE_INEXACT | FE_OVERFLOW | FE_UNDERFLOW; for (int e : excepts) { @@ -112,8 +112,8 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { } } - int r = __llvm_libc::feraiseexcept(allExcepts); + int r = __llvm_libc::feraiseexcept(ALL_EXCEPTS); ASSERT_EQ(r, 0); - int s = __llvm_libc::fetestexcept(allExcepts); - ASSERT_EQ(s, allExcepts); + int s = __llvm_libc::fetestexcept(ALL_EXCEPTS); + ASSERT_EQ(s, ALL_EXCEPTS); } diff --git a/libc/test/src/math/CeilTest.h b/libc/test/src/math/CeilTest.h index a45b0d577356..64be0a316ef3 100644 --- a/libc/test/src/math/CeilTest.h +++ b/libc/test/src/math/CeilTest.h @@ -64,9 +64,9 @@ template class CeilTest : public __llvm_libc::testing::Test { } void testRange(CeilFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/CopySignTest.h b/libc/test/src/math/CopySignTest.h index 46d4c3781184..20f400197ceb 100644 --- a/libc/test/src/math/CopySignTest.h +++ b/libc/test/src/math/CopySignTest.h @@ -33,9 +33,9 @@ template class CopySignTest : public __llvm_libc::testing::Test { } void testRange(CopySignFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FAbsTest.h b/libc/test/src/math/FAbsTest.h index d4377f7110da..fe163f6ff672 100644 --- a/libc/test/src/math/FAbsTest.h +++ b/libc/test/src/math/FAbsTest.h @@ -32,9 +32,9 @@ template class FAbsTest : public __llvm_libc::testing::Test { } void testRange(FabsFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FDimTest.h b/libc/test/src/math/FDimTest.h index 029c6390f076..587913acb1f1 100644 --- a/libc/test/src/math/FDimTest.h +++ b/libc/test/src/math/FDimTest.h @@ -19,7 +19,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { using FPBits = __llvm_libc::fputil::FPBits; using UIntType = typename FPBits::UIntType; - void testNaNArg(FuncPtr func) { + void test_na_n_arg(FuncPtr func) { EXPECT_FP_EQ(nan, func(nan, inf)); EXPECT_FP_EQ(nan, func(neg_inf, nan)); EXPECT_FP_EQ(nan, func(nan, zero)); @@ -29,7 +29,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(func(nan, nan), nan); } - void testInfArg(FuncPtr func) { + void test_inf_arg(FuncPtr func) { EXPECT_FP_EQ(zero, func(neg_inf, inf)); EXPECT_FP_EQ(inf, func(inf, zero)); EXPECT_FP_EQ(zero, func(neg_zero, inf)); @@ -37,7 +37,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(zero, func(T(-1.2345), inf)); } - void testNegInfArg(FuncPtr func) { + void test_neg_inf_arg(FuncPtr func) { EXPECT_FP_EQ(inf, func(inf, neg_inf)); EXPECT_FP_EQ(zero, func(neg_inf, zero)); EXPECT_FP_EQ(inf, func(neg_zero, neg_inf)); @@ -45,18 +45,18 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(inf, func(T(1.2345), neg_inf)); } - void testBothZero(FuncPtr func) { + void test_both_zero(FuncPtr func) { EXPECT_FP_EQ(zero, func(zero, zero)); EXPECT_FP_EQ(zero, func(zero, neg_zero)); EXPECT_FP_EQ(zero, func(neg_zero, zero)); EXPECT_FP_EQ(zero, func(neg_zero, neg_zero)); } - void testInRange(FuncPtr func) { - constexpr UIntType count = 10000001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count; - ++i, v += step, w -= step) { + void test_in_range(FuncPtr func) { + constexpr UIntType COUNT = 10000001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0, w = UIntType(-1); i <= COUNT; + ++i, v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FMaxTest.h b/libc/test/src/math/FMaxTest.h index c72dad5062eb..4a0b2151a9cc 100644 --- a/libc/test/src/math/FMaxTest.h +++ b/libc/test/src/math/FMaxTest.h @@ -55,10 +55,10 @@ template class FMaxTest : public __llvm_libc::testing::Test { } void testRange(FMaxFunc func) { - constexpr UIntType count = 10000001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count; - ++i, v += step, w -= step) { + constexpr UIntType COUNT = 10000001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0, w = UIntType(-1); i <= COUNT; + ++i, v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FMinTest.h b/libc/test/src/math/FMinTest.h index a7491a063fd0..a5d62622b205 100644 --- a/libc/test/src/math/FMinTest.h +++ b/libc/test/src/math/FMinTest.h @@ -55,10 +55,10 @@ template class FMinTest : public __llvm_libc::testing::Test { } void testRange(FMinFunc func) { - constexpr UIntType count = 10000001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count; - ++i, v += step, w -= step) { + constexpr UIntType COUNT = 10000001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0, w = UIntType(-1); i <= COUNT; + ++i, v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FloorTest.h b/libc/test/src/math/FloorTest.h index b52f8a1a4303..0a3d5a3994aa 100644 --- a/libc/test/src/math/FloorTest.h +++ b/libc/test/src/math/FloorTest.h @@ -64,9 +64,9 @@ template class FloorTest : public __llvm_libc::testing::Test { } void testRange(FloorFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 30af38f3cee3..2d04989400f4 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -29,7 +29,7 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { const T zero = T(__llvm_libc::fputil::FPBits::zero()); const T neg_zero = T(__llvm_libc::fputil::FPBits::neg_zero()); - UIntType getRandomBitPattern() { + UIntType get_random_bit_pattern() { UIntType bits{0}; for (UIntType i = 0; i < sizeof(UIntType) / 2; ++i) { bits = @@ -39,7 +39,7 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { } public: - void testSpecialNumbers(Func func) { + void test_special_numbers(Func func) { EXPECT_FP_EQ(func(zero, zero, zero), zero); EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero); EXPECT_FP_EQ(func(inf, inf, zero), inf); @@ -63,14 +63,14 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); } - void testSubnormalRange(Func func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + void test_subnormal_range(Func func) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType v = FPBits::MIN_SUBNORMAL, w = FPBits::MAX_SUBNORMAL; v <= FPBits::MAX_SUBNORMAL && w >= FPBits::MIN_SUBNORMAL; - v += step, w -= step) { - T x = T(FPBits(getRandomBitPattern())), y = T(FPBits(v)), + v += STEP, w -= STEP) { + T x = T(FPBits(get_random_bit_pattern())), y = T(FPBits(v)), z = T(FPBits(w)); T result = func(x, y, z); mpfr::TernaryInput input{x, y, z}; @@ -78,14 +78,14 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { } } - void testNormalRange(Func func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; + void test_normal_range(Func func) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL; v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)), - z = T(FPBits(getRandomBitPattern())); + z = T(FPBits(get_random_bit_pattern())); T result = func(x, y, z); mpfr::TernaryInput input{x, y, z}; ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5); diff --git a/libc/test/src/math/FrexpTest.h b/libc/test/src/math/FrexpTest.h index 6ccf0b9bac8a..06789f4ebcc2 100644 --- a/libc/test/src/math/FrexpTest.h +++ b/libc/test/src/math/FrexpTest.h @@ -19,7 +19,7 @@ template class FrexpTest : public __llvm_libc::testing::Test { DECLARE_SPECIAL_CONSTANTS(T) - static constexpr UIntType HiddenBit = + static constexpr UIntType HIDDEN_BIT = UIntType(1) << __llvm_libc::fputil::MantissaWidth::VALUE; public: @@ -93,9 +93,9 @@ template class FrexpTest : public __llvm_libc::testing::Test { void testRange(FrexpFunc func) { using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = static_cast(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0l) continue; diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h index 962da0b39e06..584b5aef3359 100644 --- a/libc/test/src/math/HypotTest.h +++ b/libc/test/src/math/HypotTest.h @@ -32,7 +32,7 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { const T neg_zero = T(__llvm_libc::fputil::FPBits::neg_zero()); public: - void testSpecialNumbers(Func func) { + void test_special_numbers(Func func) { EXPECT_FP_EQ(func(inf, nan), inf); EXPECT_FP_EQ(func(nan, neg_inf), inf); EXPECT_FP_EQ(func(zero, inf), inf); @@ -45,14 +45,14 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(func(neg_zero, zero), zero); } - void testSubnormalRange(Func func) { - constexpr UIntType count = 1000001; + void test_subnormal_range(Func func) { + constexpr UIntType COUNT = 1000001; for (unsigned scale = 0; scale < 4; ++scale) { - UIntType maxValue = FPBits::MAX_SUBNORMAL << scale; - UIntType step = (maxValue - FPBits::MIN_SUBNORMAL) / count; + UIntType max_value = FPBits::MAX_SUBNORMAL << scale; + UIntType step = (max_value - FPBits::MIN_SUBNORMAL) / COUNT; for (int signs = 0; signs < 4; ++signs) { - for (UIntType v = FPBits::MIN_SUBNORMAL, w = maxValue; - v <= maxValue && w >= FPBits::MIN_SUBNORMAL; + for (UIntType v = FPBits::MIN_SUBNORMAL, w = max_value; + v <= max_value && w >= FPBits::MIN_SUBNORMAL; v += step, w -= step) { T x = T(FPBits(v)), y = T(FPBits(w)); if (signs % 2 == 1) { @@ -70,13 +70,13 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { } } - void testNormalRange(Func func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; + void test_normal_range(Func func) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; for (int signs = 0; signs < 4; ++signs) { for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL; v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (signs % 2 == 1) { x = -x; diff --git a/libc/test/src/math/ILogbTest.h b/libc/test/src/math/ILogbTest.h index 8d8f4e9a6716..e2de14de446d 100644 --- a/libc/test/src/math/ILogbTest.h +++ b/libc/test/src/math/ILogbTest.h @@ -21,7 +21,7 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { template struct ILogbFunc { typedef int (*Func)(T); }; template - void testSpecialNumbers(typename ILogbFunc::Func func) { + void test_special_numbers(typename ILogbFunc::Func func) { EXPECT_EQ(FP_ILOGB0, func(T(__llvm_libc::fputil::FPBits::zero()))); EXPECT_EQ(FP_ILOGB0, func(T(__llvm_libc::fputil::FPBits::neg_zero()))); @@ -32,7 +32,8 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { EXPECT_EQ(INT_MAX, func(T(__llvm_libc::fputil::FPBits::neg_inf()))); } - template void testPowersOfTwo(typename ILogbFunc::Func func) { + template + void test_powers_of_two(typename ILogbFunc::Func func) { EXPECT_EQ(0, func(T(1.0))); EXPECT_EQ(0, func(T(-1.0))); @@ -53,7 +54,7 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { } template - void testSomeIntegers(typename ILogbFunc::Func func) { + void test_some_integers(typename ILogbFunc::Func func) { EXPECT_EQ(1, func(T(3.0))); EXPECT_EQ(1, func(T(-3.0))); @@ -71,14 +72,14 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { } template - void testSubnormalRange(typename ILogbFunc::Func func) { + void test_subnormal_range(typename ILogbFunc::Func func) { using FPBits = __llvm_libc::fputil::FPBits; using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType v = FPBits::MIN_SUBNORMAL; v <= FPBits::MAX_SUBNORMAL; - v += step) { + v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0) continue; @@ -89,12 +90,13 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { } } - template void testNormalRange(typename ILogbFunc::Func func) { + template + void test_normal_range(typename ILogbFunc::Func func) { using FPBits = __llvm_libc::fputil::FPBits; using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; - for (UIntType v = FPBits::MIN_NORMAL; v <= FPBits::MAX_NORMAL; v += step) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; + for (UIntType v = FPBits::MIN_NORMAL; v <= FPBits::MAX_NORMAL; v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0) continue; diff --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h index cd6491d2b71e..ca9f33f2f816 100644 --- a/libc/test/src/math/LdExpTest.h +++ b/libc/test/src/math/LdExpTest.h @@ -23,10 +23,10 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { using FPBits = __llvm_libc::fputil::FPBits; using NormalFloat = __llvm_libc::fputil::NormalFloat; using UIntType = typename FPBits::UIntType; - static constexpr UIntType mantissaWidth = + static constexpr UIntType MANTISSA_WIDTH = __llvm_libc::fputil::MantissaWidth::VALUE; // A normalized mantissa to be used with tests. - static constexpr UIntType mantissa = NormalFloat::ONE + 0x1234; + static constexpr UIntType MANTISSA = NormalFloat::ONE + 0x1234; const T zero = T(__llvm_libc::fputil::FPBits::zero()); const T neg_zero = T(__llvm_libc::fputil::FPBits::neg_zero()); @@ -38,8 +38,8 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { typedef T (*LdExpFunc)(T, int); void testSpecialNumbers(LdExpFunc func) { - int expArray[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX}; - for (int exp : expArray) { + int exp_array[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX}; + for (int exp : exp_array) { ASSERT_FP_EQ(zero, func(zero, exp)); ASSERT_FP_EQ(neg_zero, func(neg_zero, exp)); ASSERT_FP_EQ(inf, func(inf, exp)); @@ -49,10 +49,10 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { } void testPowersOfTwo(LdExpFunc func) { - int32_t expArray[5] = {1, 2, 3, 4, 5}; - int32_t valArray[6] = {1, 2, 4, 8, 16, 32}; - for (int32_t exp : expArray) { - for (int32_t val : valArray) { + int32_t exp_array[5] = {1, 2, 3, 4, 5}; + int32_t val_array[6] = {1, 2, 4, 8, 16, 32}; + for (int32_t exp : exp_array) { + for (int32_t val : val_array) { ASSERT_FP_EQ(T(val << exp), func(T(val), exp)); ASSERT_FP_EQ(T(-1 * (val << exp)), func(T(-val), exp)); } @@ -70,11 +70,12 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { void testUnderflowToZeroOnNormal(LdExpFunc func) { // In this test, we pass a normal nubmer to func and expect zero // to be returned due to underflow. - int32_t baseExponent = FPBits::EXPONENT_BIAS + mantissaWidth; - int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3, - baseExponent + 2, baseExponent + 1}; - T x = NormalFloat(0, mantissa, 0); - for (int32_t exp : expArray) { + int32_t base_exponent = FPBits::EXPONENT_BIAS + MANTISSA_WIDTH; + int32_t exp_array[] = {base_exponent + 5, base_exponent + 4, + base_exponent + 3, base_exponent + 2, + base_exponent + 1}; + T x = NormalFloat(0, MANTISSA, 0); + for (int32_t exp : exp_array) { ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : neg_zero); } } @@ -82,25 +83,26 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { void testUnderflowToZeroOnSubnormal(LdExpFunc func) { // In this test, we pass a normal nubmer to func and expect zero // to be returned due to underflow. - int32_t baseExponent = FPBits::EXPONENT_BIAS + mantissaWidth; - int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3, - baseExponent + 2, baseExponent + 1}; - T x = NormalFloat(-FPBits::EXPONENT_BIAS, mantissa, 0); - for (int32_t exp : expArray) { + int32_t base_exponent = FPBits::EXPONENT_BIAS + MANTISSA_WIDTH; + int32_t exp_array[] = {base_exponent + 5, base_exponent + 4, + base_exponent + 3, base_exponent + 2, + base_exponent + 1}; + T x = NormalFloat(-FPBits::EXPONENT_BIAS, MANTISSA, 0); + for (int32_t exp : exp_array) { ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : neg_zero); } } void testNormalOperation(LdExpFunc func) { - T valArray[] = { + T val_array[] = { // Normal numbers - NormalFloat(100, mantissa, 0), NormalFloat(-100, mantissa, 0), - NormalFloat(100, mantissa, 1), NormalFloat(-100, mantissa, 1), + NormalFloat(100, MANTISSA, 0), NormalFloat(-100, MANTISSA, 0), + NormalFloat(100, MANTISSA, 1), NormalFloat(-100, MANTISSA, 1), // Subnormal numbers - NormalFloat(-FPBits::EXPONENT_BIAS, mantissa, 0), - NormalFloat(-FPBits::EXPONENT_BIAS, mantissa, 1)}; - for (int32_t exp = 0; exp <= static_cast(mantissaWidth); ++exp) { - for (T x : valArray) { + NormalFloat(-FPBits::EXPONENT_BIAS, MANTISSA, 0), + NormalFloat(-FPBits::EXPONENT_BIAS, MANTISSA, 1)}; + for (int32_t exp = 0; exp <= static_cast(MANTISSA_WIDTH); ++exp) { + for (T x : val_array) { // We compare the result of ldexp with the result // of the native multiplication/division instruction. ASSERT_FP_EQ(func(x, exp), x * (UIntType(1) << exp)); @@ -118,13 +120,13 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { x = NormalFloat(FPBits::EXPONENT_BIAS, NormalFloat::ONE, 0); int exp = -FPBits::MAX_EXPONENT - 5; T result = func(x, exp); - FPBits resultBits(result); - ASSERT_FALSE(resultBits.is_zero()); + FPBits result_bits(result); + ASSERT_FALSE(result_bits.is_zero()); // Verify that the result is indeed subnormal. - ASSERT_EQ(resultBits.get_unbiased_exponent(), uint16_t(0)); + ASSERT_EQ(result_bits.get_unbiased_exponent(), uint16_t(0)); // But if the exp is so less that normalization leads to zero, then // the result should be zero. - result = func(x, -FPBits::MAX_EXPONENT - int(mantissaWidth) - 5); + result = func(x, -FPBits::MAX_EXPONENT - int(MANTISSA_WIDTH) - 5); ASSERT_TRUE(FPBits(result).is_zero()); // Start with a subnormal number but pass a very high number for exponent. diff --git a/libc/test/src/math/LogbTest.h b/libc/test/src/math/LogbTest.h index 6e7e6d5bda06..49ccf3a1a832 100644 --- a/libc/test/src/math/LogbTest.h +++ b/libc/test/src/math/LogbTest.h @@ -19,7 +19,7 @@ template class LogbTest : public __llvm_libc::testing::Test { DECLARE_SPECIAL_CONSTANTS(T) - static constexpr UIntType HiddenBit = + static constexpr UIntType HIDDEN_BIT = UIntType(1) << __llvm_libc::fputil::MantissaWidth::VALUE; public: @@ -72,9 +72,9 @@ template class LogbTest : public __llvm_libc::testing::Test { void testRange(LogbFunc func) { using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = static_cast(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0l) continue; diff --git a/libc/test/src/math/ModfTest.h b/libc/test/src/math/ModfTest.h index a3f0c4108207..4e88c6ae3654 100644 --- a/libc/test/src/math/ModfTest.h +++ b/libc/test/src/math/ModfTest.h @@ -84,9 +84,9 @@ template class ModfTest : public __llvm_libc::testing::Test { } void testRange(ModfFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x) || x == T(0.0)) continue; diff --git a/libc/test/src/math/NextAfterTest.h b/libc/test/src/math/NextAfterTest.h index 4b7b5f4997ee..201d16433704 100644 --- a/libc/test/src/math/NextAfterTest.h +++ b/libc/test/src/math/NextAfterTest.h @@ -22,7 +22,7 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { using MantissaWidth = __llvm_libc::fputil::MantissaWidth; using UIntType = typename FPBits::UIntType; - static constexpr int bitWidthOfType = + static constexpr int BIT_WIDTH_OF_TYPE = __llvm_libc::fputil::FloatProperties::BIT_WIDTH; const T zero = T(FPBits::zero()); @@ -30,10 +30,10 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { const T inf = T(FPBits::inf()); const T neg_inf = T(FPBits::neg_inf()); const T nan = T(FPBits::build_nan(1)); - const UIntType MIN_SUBNORMAL = FPBits::MIN_SUBNORMAL; - const UIntType MAX_SUBNORMAL = FPBits::MAX_SUBNORMAL; - const UIntType MIN_NORMAL = FPBits::MIN_NORMAL; - const UIntType MAX_NORMAL = FPBits::MAX_NORMAL; + const UIntType min_subnormal = FPBits::MIN_SUBNORMAL; + const UIntType max_subnormal = FPBits::MAX_SUBNORMAL; + const UIntType min_normal = FPBits::MIN_NORMAL; + const UIntType max_normal = FPBits::MAX_NORMAL; public: typedef T (*NextAfterFunc)(T, T); @@ -50,89 +50,91 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { // 'from' is zero|neg_zero. T x = zero; T result = func(x, T(1)); - UIntType expectedBits = 1; - T expected = *reinterpret_cast(&expectedBits); + UIntType expected_bits = 1; + T expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, T(-1)); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); x = neg_zero; result = func(x, 1); - expectedBits = 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, -1); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); // 'from' is max subnormal value. - x = *reinterpret_cast(&MAX_SUBNORMAL); + x = *reinterpret_cast(&max_subnormal); result = func(x, 1); - expected = *reinterpret_cast(&MIN_NORMAL); + expected = *reinterpret_cast(&min_normal); ASSERT_FP_EQ(result, expected); result = func(x, 0); - expectedBits = MAX_SUBNORMAL - 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = max_subnormal - 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); x = -x; result = func(x, -1); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MIN_NORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + min_normal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, 0); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MAX_SUBNORMAL - 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = + (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + max_subnormal - 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); // 'from' is min subnormal value. - x = *reinterpret_cast(&MIN_SUBNORMAL); + x = *reinterpret_cast(&min_subnormal); result = func(x, 1); - expectedBits = MIN_SUBNORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = min_subnormal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, 0), 0); x = -x; result = func(x, -1); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MIN_SUBNORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = + (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + min_subnormal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, 0), T(-0.0)); // 'from' is min normal. - x = *reinterpret_cast(&MIN_NORMAL); + x = *reinterpret_cast(&min_normal); result = func(x, 0); - expectedBits = MAX_SUBNORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = max_subnormal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, inf); - expectedBits = MIN_NORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = min_normal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); x = -x; result = func(x, 0); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MAX_SUBNORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + max_subnormal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, -inf); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MIN_NORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + min_normal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); // 'from' is max normal and 'to' is infinity. - x = *reinterpret_cast(&MAX_NORMAL); + x = *reinterpret_cast(&max_normal); result = func(x, inf); ASSERT_FP_EQ(result, inf); @@ -142,48 +144,48 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { // 'from' is infinity. x = inf; result = func(x, 0); - expectedBits = MAX_NORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = max_normal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, inf), inf); x = neg_inf; result = func(x, 0); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MAX_NORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + max_normal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, neg_inf), neg_inf); // 'from' is a power of 2. x = T(32.0); result = func(x, 0); - FPBits xBits = FPBits(x); - FPBits resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - uint16_t(xBits.get_unbiased_exponent() - 1)); - ASSERT_EQ(resultBits.get_mantissa(), + FPBits x_bits = FPBits(x); + FPBits result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + uint16_t(x_bits.get_unbiased_exponent() - 1)); + ASSERT_EQ(result_bits.get_mantissa(), (UIntType(1) << MantissaWidth::VALUE) - 1); result = func(x, T(33.0)); - resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - xBits.get_unbiased_exponent()); - ASSERT_EQ(resultBits.get_mantissa(), xBits.get_mantissa() + UIntType(1)); + result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + x_bits.get_unbiased_exponent()); + ASSERT_EQ(result_bits.get_mantissa(), x_bits.get_mantissa() + UIntType(1)); x = -x; result = func(x, 0); - resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - uint16_t(xBits.get_unbiased_exponent() - 1)); - ASSERT_EQ(resultBits.get_mantissa(), + result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + uint16_t(x_bits.get_unbiased_exponent() - 1)); + ASSERT_EQ(result_bits.get_mantissa(), (UIntType(1) << MantissaWidth::VALUE) - 1); result = func(x, T(-33.0)); - resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - xBits.get_unbiased_exponent()); - ASSERT_EQ(resultBits.get_mantissa(), xBits.get_mantissa() + UIntType(1)); + result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + x_bits.get_unbiased_exponent()); + ASSERT_EQ(result_bits.get_mantissa(), x_bits.get_mantissa() + UIntType(1)); } }; diff --git a/libc/test/src/math/RIntTest.h b/libc/test/src/math/RIntTest.h index 53844a1eaa5f..438a92338dfa 100644 --- a/libc/test/src/math/RIntTest.h +++ b/libc/test/src/math/RIntTest.h @@ -21,8 +21,8 @@ namespace mpfr = __llvm_libc::testing::mpfr; -static constexpr int roundingModes[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, - FE_TONEAREST}; +static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, + FE_TONEAREST}; template class RIntTestTemplate : public __llvm_libc::testing::Test { @@ -39,7 +39,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { const T neg_inf = T(FPBits::neg_inf()); const T nan = T(FPBits::build_nan(1)); - static inline mpfr::RoundingMode toMPFRRoundingMode(int mode) { + static inline mpfr::RoundingMode to_mpfr_rounding_mode(int mode) { switch (mode) { case FE_UPWARD: return mpfr::RoundingMode::Upward; @@ -56,7 +56,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { public: void testSpecialNumbers(RIntFunc func) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); ASSERT_FP_EQ(inf, func(inf)); ASSERT_FP_EQ(neg_inf, func(neg_inf)); @@ -67,50 +67,50 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { } void testRoundNumbers(RIntFunc func) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(T(1.0)), mpfr::Round(T(1.0), mpfrMode)); - ASSERT_FP_EQ(func(T(-1.0)), mpfr::Round(T(-1.0), mpfrMode)); - ASSERT_FP_EQ(func(T(10.0)), mpfr::Round(T(10.0), mpfrMode)); - ASSERT_FP_EQ(func(T(-10.0)), mpfr::Round(T(-10.0), mpfrMode)); - ASSERT_FP_EQ(func(T(1234.0)), mpfr::Round(T(1234.0), mpfrMode)); - ASSERT_FP_EQ(func(T(-1234.0)), mpfr::Round(T(-1234.0), mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(T(1.0)), mpfr::round(T(1.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-1.0)), mpfr::round(T(-1.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(10.0)), mpfr::round(T(10.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-10.0)), mpfr::round(T(-10.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(1234.0)), mpfr::round(T(1234.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-1234.0)), mpfr::round(T(-1234.0), mpfr_mode)); } } void testFractions(RIntFunc func) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(T(0.5)), mpfr::Round(T(0.5), mpfrMode)); - ASSERT_FP_EQ(func(T(-0.5)), mpfr::Round(T(-0.5), mpfrMode)); - ASSERT_FP_EQ(func(T(0.115)), mpfr::Round(T(0.115), mpfrMode)); - ASSERT_FP_EQ(func(T(-0.115)), mpfr::Round(T(-0.115), mpfrMode)); - ASSERT_FP_EQ(func(T(0.715)), mpfr::Round(T(0.715), mpfrMode)); - ASSERT_FP_EQ(func(T(-0.715)), mpfr::Round(T(-0.715), mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(T(0.5)), mpfr::round(T(0.5), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.5)), mpfr::round(T(-0.5), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.115)), mpfr::round(T(0.115), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.115)), mpfr::round(T(-0.115), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.715)), mpfr::round(T(0.715), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.715)), mpfr::round(T(-0.715), mpfr_mode)); } } void testSubnormalRange(RIntFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType i = FPBits::MIN_SUBNORMAL; i <= FPBits::MAX_SUBNORMAL; - i += step) { + i += STEP) { T x = T(FPBits(i)); - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(x), mpfr::round(x, mpfr_mode)); } } } void testNormalRange(RIntFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; - for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += step) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; + for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += STEP) { T x = T(FPBits(i)); // In normal range on x86 platforms, the long double implicit 1 bit can be // zero making the numbers NaN. We will skip them. @@ -118,10 +118,10 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { continue; } - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(x), mpfr::round(x, mpfr_mode)); } } } diff --git a/libc/test/src/math/RemQuoTest.h b/libc/test/src/math/RemQuoTest.h index e96ddd759bd6..5903760c1969 100644 --- a/libc/test/src/math/RemQuoTest.h +++ b/libc/test/src/math/RemQuoTest.h @@ -95,12 +95,12 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test { } void testSubnormalRange(RemQuoFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType v = FPBits::MIN_SUBNORMAL, w = FPBits::MAX_SUBNORMAL; v <= FPBits::MAX_SUBNORMAL && w >= FPBits::MIN_SUBNORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); mpfr::BinaryOutput result; mpfr::BinaryInput input{x, y}; @@ -110,11 +110,11 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test { } void testNormalRange(RemQuoFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL; v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); mpfr::BinaryOutput result; mpfr::BinaryInput input{x, y}; diff --git a/libc/test/src/math/RoundTest.h b/libc/test/src/math/RoundTest.h index d4fea629507f..4720f385edab 100644 --- a/libc/test/src/math/RoundTest.h +++ b/libc/test/src/math/RoundTest.h @@ -64,9 +64,9 @@ template class RoundTest : public __llvm_libc::testing::Test { } void testRange(RoundFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/RoundToIntegerTest.h b/libc/test/src/math/RoundToIntegerTest.h index b77c4bffc828..60647eaadc6b 100644 --- a/libc/test/src/math/RoundToIntegerTest.h +++ b/libc/test/src/math/RoundToIntegerTest.h @@ -23,8 +23,8 @@ namespace mpfr = __llvm_libc::testing::mpfr; -static constexpr int roundingModes[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, - FE_TONEAREST}; +static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, + FE_TONEAREST}; template class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { @@ -40,11 +40,11 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { const F inf = F(__llvm_libc::fputil::FPBits::inf()); const F neg_inf = F(__llvm_libc::fputil::FPBits::neg_inf()); const F nan = F(__llvm_libc::fputil::FPBits::build_nan(1)); - static constexpr I IntegerMin = I(1) << (sizeof(I) * 8 - 1); - static constexpr I IntegerMax = -(IntegerMin + 1); + static constexpr I INTEGER_MIN = I(1) << (sizeof(I) * 8 - 1); + static constexpr I INTEGER_MAX = -(INTEGER_MIN + 1); - void testOneInput(RoundToIntegerFunc func, F input, I expected, - bool expectError) { + void test_one_input(RoundToIntegerFunc func, F input, I expected, + bool expectError) { #if math_errhandling & MATH_ERRNO errno = 0; #endif @@ -71,7 +71,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { } } - static inline mpfr::RoundingMode toMPFRRoundingMode(int mode) { + static inline mpfr::RoundingMode to_mpfr_rounding_mode(int mode) { switch (mode) { case FE_UPWARD: return mpfr::RoundingMode::Upward; @@ -96,32 +96,32 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { #endif } - void doInfinityAndNaNTest(RoundToIntegerFunc func) { - testOneInput(func, inf, IntegerMax, true); - testOneInput(func, neg_inf, IntegerMin, true); - testOneInput(func, nan, IntegerMax, true); + void do_infinity_and_na_n_test(RoundToIntegerFunc func) { + test_one_input(func, inf, INTEGER_MAX, true); + test_one_input(func, neg_inf, INTEGER_MIN, true); + test_one_input(func, nan, INTEGER_MAX, true); } void testInfinityAndNaN(RoundToIntegerFunc func) { if (TestModes) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - doInfinityAndNaNTest(func); + do_infinity_and_na_n_test(func); } } else { - doInfinityAndNaNTest(func); + do_infinity_and_na_n_test(func); } } - void doRoundNumbersTest(RoundToIntegerFunc func) { - testOneInput(func, zero, I(0), false); - testOneInput(func, neg_zero, I(0), false); - testOneInput(func, F(1.0), I(1), false); - testOneInput(func, F(-1.0), I(-1), false); - testOneInput(func, F(10.0), I(10), false); - testOneInput(func, F(-10.0), I(-10), false); - testOneInput(func, F(1234.0), I(1234), false); - testOneInput(func, F(-1234.0), I(-1234), false); + void do_round_numbers_test(RoundToIntegerFunc func) { + test_one_input(func, zero, I(0), false); + test_one_input(func, neg_zero, I(0), false); + test_one_input(func, F(1.0), I(1), false); + test_one_input(func, F(-1.0), I(-1), false); + test_one_input(func, F(10.0), I(10), false); + test_one_input(func, F(-10.0), I(-10), false); + test_one_input(func, F(1234.0), I(1234), false); + test_one_input(func, F(-1234.0), I(-1234), false); // The rest of this this function compares with an equivalent MPFR function // which rounds floating point numbers to long values. There is no MPFR @@ -131,58 +131,58 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (sizeof(I) > sizeof(long)) return; - constexpr int exponentLimit = sizeof(I) * 8 - 1; + constexpr int EXPONENT_LIMIT = sizeof(I) * 8 - 1; // We start with 1.0 so that the implicit bit for x86 long doubles // is set. FPBits bits(F(1.0)); - bits.set_unbiased_exponent(exponentLimit + FPBits::EXPONENT_BIAS); + bits.set_unbiased_exponent(EXPONENT_LIMIT + FPBits::EXPONENT_BIAS); bits.set_sign(1); bits.set_mantissa(0); F x = F(bits); - long mpfrResult; - bool erangeflag = mpfr::RoundToLong(x, mpfrResult); + long mpfr_result; + bool erangeflag = mpfr::round_to_long(x, mpfr_result); ASSERT_FALSE(erangeflag); - testOneInput(func, x, mpfrResult, false); + test_one_input(func, x, mpfr_result, false); } void testRoundNumbers(RoundToIntegerFunc func) { if (TestModes) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - doRoundNumbersTest(func); + do_round_numbers_test(func); } } else { - doRoundNumbersTest(func); + do_round_numbers_test(func); } } - void doFractionsTest(RoundToIntegerFunc func, int mode) { - constexpr F fractions[] = {0.5, -0.5, 0.115, -0.115, 0.715, -0.715}; - for (F x : fractions) { - long mpfrLongResult; + void do_fractions_test(RoundToIntegerFunc func, int mode) { + constexpr F FRACTIONS[] = {0.5, -0.5, 0.115, -0.115, 0.715, -0.715}; + for (F x : FRACTIONS) { + long mpfr_long_result; bool erangeflag; if (TestModes) - erangeflag = - mpfr::RoundToLong(x, toMPFRRoundingMode(mode), mpfrLongResult); + erangeflag = mpfr::round_to_long(x, to_mpfr_rounding_mode(mode), + mpfr_long_result); else - erangeflag = mpfr::RoundToLong(x, mpfrLongResult); + erangeflag = mpfr::round_to_long(x, mpfr_long_result); ASSERT_FALSE(erangeflag); - I mpfrResult = mpfrLongResult; - testOneInput(func, x, mpfrResult, false); + I mpfr_result = mpfr_long_result; + test_one_input(func, x, mpfr_result, false); } } void testFractions(RoundToIntegerFunc func) { if (TestModes) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - doFractionsTest(func, mode); + do_fractions_test(func, mode); } } else { // Passing 0 for mode has no effect as it is not used in doFractionsTest // when `TestModes` is false; - doFractionsTest(func, 0); + do_fractions_test(func, 0); } } @@ -195,39 +195,39 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (sizeof(I) > sizeof(long)) return; - constexpr int exponentLimit = sizeof(I) * 8 - 1; + constexpr int EXPONENT_LIMIT = sizeof(I) * 8 - 1; // We start with 1.0 so that the implicit bit for x86 long doubles // is set. FPBits bits(F(1.0)); - bits.set_unbiased_exponent(exponentLimit + FPBits::EXPONENT_BIAS); + bits.set_unbiased_exponent(EXPONENT_LIMIT + FPBits::EXPONENT_BIAS); bits.set_sign(1); bits.set_mantissa(UIntType(0x1) << (__llvm_libc::fputil::MantissaWidth::VALUE - 1)); F x = F(bits); if (TestModes) { - for (int m : roundingModes) { + for (int m : ROUNDING_MODES) { __llvm_libc::fputil::set_round(m); - long mpfrLongResult; + long mpfr_long_result; bool erangeflag = - mpfr::RoundToLong(x, toMPFRRoundingMode(m), mpfrLongResult); + mpfr::round_to_long(x, to_mpfr_rounding_mode(m), mpfr_long_result); ASSERT_TRUE(erangeflag); - testOneInput(func, x, IntegerMin, true); + test_one_input(func, x, INTEGER_MIN, true); } } else { - long mpfrLongResult; - bool erangeflag = mpfr::RoundToLong(x, mpfrLongResult); + long mpfr_long_result; + bool erangeflag = mpfr::round_to_long(x, mpfr_long_result); ASSERT_TRUE(erangeflag); - testOneInput(func, x, IntegerMin, true); + test_one_input(func, x, INTEGER_MIN, true); } } void testSubnormalRange(RoundToIntegerFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType i = FPBits::MIN_SUBNORMAL; i <= FPBits::MAX_SUBNORMAL; - i += step) { + i += STEP) { F x = F(FPBits(i)); if (x == F(0.0)) continue; @@ -235,25 +235,25 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (TestModes) { if (x > 0) { __llvm_libc::fputil::set_round(FE_UPWARD); - testOneInput(func, x, I(1), false); + test_one_input(func, x, I(1), false); __llvm_libc::fputil::set_round(FE_DOWNWARD); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_TOWARDZERO); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_TONEAREST); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); } else { __llvm_libc::fputil::set_round(FE_UPWARD); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_DOWNWARD); - testOneInput(func, x, I(-1), false); + test_one_input(func, x, I(-1), false); __llvm_libc::fputil::set_round(FE_TOWARDZERO); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_TONEAREST); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); } } else { - testOneInput(func, x, 0L, false); + test_one_input(func, x, 0L, false); } } } @@ -267,9 +267,9 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (sizeof(I) > sizeof(long)) return; - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; - for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += step) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; + for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += STEP) { F x = F(FPBits(i)); // In normal range on x86 platforms, the long double implicit 1 bit can be // zero making the numbers NaN. We will skip them. @@ -278,25 +278,25 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { } if (TestModes) { - for (int m : roundingModes) { - long mpfrLongResult; - bool erangeflag = - mpfr::RoundToLong(x, toMPFRRoundingMode(m), mpfrLongResult); - I mpfrResult = mpfrLongResult; + for (int m : ROUNDING_MODES) { + long mpfr_long_result; + bool erangeflag = mpfr::round_to_long(x, to_mpfr_rounding_mode(m), + mpfr_long_result); + I mpfr_result = mpfr_long_result; __llvm_libc::fputil::set_round(m); if (erangeflag) - testOneInput(func, x, x > 0 ? IntegerMax : IntegerMin, true); + test_one_input(func, x, x > 0 ? INTEGER_MAX : INTEGER_MIN, true); else - testOneInput(func, x, mpfrResult, false); + test_one_input(func, x, mpfr_result, false); } } else { - long mpfrLongResult; - bool erangeflag = mpfr::RoundToLong(x, mpfrLongResult); - I mpfrResult = mpfrLongResult; + long mpfr_long_result; + bool erangeflag = mpfr::round_to_long(x, mpfr_long_result); + I mpfr_result = mpfr_long_result; if (erangeflag) - testOneInput(func, x, x > 0 ? IntegerMax : IntegerMin, true); + test_one_input(func, x, x > 0 ? INTEGER_MAX : INTEGER_MIN, true); else - testOneInput(func, x, mpfrResult, false); + test_one_input(func, x, mpfr_result, false); } } } diff --git a/libc/test/src/math/SqrtTest.h b/libc/test/src/math/SqrtTest.h index a6120a2c51eb..f3e4def723aa 100644 --- a/libc/test/src/math/SqrtTest.h +++ b/libc/test/src/math/SqrtTest.h @@ -18,7 +18,7 @@ template class SqrtTest : public __llvm_libc::testing::Test { DECLARE_SPECIAL_CONSTANTS(T) - static constexpr UIntType HiddenBit = + static constexpr UIntType HIDDEN_BIT = UIntType(1) << __llvm_libc::fputil::MantissaWidth::VALUE; public: @@ -37,7 +37,7 @@ template class SqrtTest : public __llvm_libc::testing::Test { } void testDenormalValues(SqrtFunc func) { - for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) { + for (UIntType mant = 1; mant < HIDDEN_BIT; mant <<= 1) { FPBits denormal(T(0.0)); denormal.set_mantissa(mant); @@ -45,18 +45,18 @@ template class SqrtTest : public __llvm_libc::testing::Test { T(0.5)); } - constexpr UIntType count = 1'000'001; - constexpr UIntType step = HiddenBit / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 1'000'001; + constexpr UIntType STEP = HIDDEN_BIT / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = *reinterpret_cast(&v); ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, func(x), 0.5); } } void testNormalRange(SqrtFunc func) { - constexpr UIntType count = 10'000'001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10'000'001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = *reinterpret_cast(&v); if (isnan(x) || (x < 0)) { continue; diff --git a/libc/test/src/math/TruncTest.h b/libc/test/src/math/TruncTest.h index 6e65f3d7888d..f939de40e24d 100644 --- a/libc/test/src/math/TruncTest.h +++ b/libc/test/src/math/TruncTest.h @@ -64,9 +64,9 @@ template class TruncTest : public __llvm_libc::testing::Test { } void testRange(TruncFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/cos_test.cpp b/libc/test/src/math/cos_test.cpp index e99cb39ad1e8..0feef88ca17f 100644 --- a/libc/test/src/math/cos_test.cpp +++ b/libc/test/src/math/cos_test.cpp @@ -19,9 +19,9 @@ DECLARE_SPECIAL_CONSTANTS(double) TEST(LlvmLibccosTest, Range) { static constexpr double _2pi = 6.283185307179586; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { double x = double(FPBits(v)); // TODO: Expand the range of testing after range reduction is implemented. if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp index be968d6ea35c..8e5f6c0dcfe8 100644 --- a/libc/test/src/math/cosf_test.cpp +++ b/libc/test/src/math/cosf_test.cpp @@ -18,7 +18,7 @@ #include #include -using __llvm_libc::testing::sdcomp26094Values; +using __llvm_libc::testing::SDCOMP26094_VALUES; using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -47,9 +47,9 @@ TEST(LlvmLibcCosfTest, SpecialNumbers) { } TEST(LlvmLibcCosfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; @@ -73,7 +73,7 @@ TEST(LlvmLibcCosfTest, SmallValues) { // SDCOMP-26094: check cosf in the cases for which the range reducer // returns values furthest beyond its nominal upper bound of pi/4. TEST(LlvmLibcCosfTest, SDCOMP_26094) { - for (uint32_t v : sdcomp26094Values) { + for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits(v)); ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), 1.0); } diff --git a/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h b/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h new file mode 100644 index 000000000000..0fdfdf069aa9 --- /dev/null +++ b/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h @@ -0,0 +1,99 @@ +//===-- Common utility class for differential analysis --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "utils/testutils/StreamWrapper.h" +#include "utils/testutils/Timer.h" + +namespace __llvm_libc { +namespace testing { + +template class BinaryOpSingleOutputDiff { + using FPBits = fputil::FPBits; + using UIntType = typename FPBits::UIntType; + static constexpr UIntType MSBIT = UIntType(1) << (8 * sizeof(UIntType) - 1); + static constexpr UIntType UINTMAX = (MSBIT - 1) + MSBIT; + +public: + typedef T Func(T, T); + + static void run_perf_in_range(Func myFunc, Func otherFunc, + UIntType startingBit, UIntType endingBit, + UIntType N, testutils::OutputFileStream &log) { + auto runner = [=](Func func) { + volatile T result; + if (endingBit < startingBit) { + return; + } + + UIntType step = (endingBit - startingBit) / N; + for (UIntType bitsX = startingBit, bitsY = endingBit;; + bitsX += step, bitsY -= step) { + T x = T(FPBits(bitsX)); + T y = T(FPBits(bitsY)); + result = func(x, y); + if (endingBit - bitsX < step) { + break; + } + } + }; + + Timer timer; + timer.start(); + runner(myFunc); + timer.stop(); + + double my_average = static_cast(timer.nanoseconds()) / N; + log << "-- My function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << my_average << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / my_average) << " op/s \n"; + + timer.start(); + runner(otherFunc); + timer.stop(); + + double other_average = static_cast(timer.nanoseconds()) / N; + log << "-- Other function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << other_average << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / other_average) << " op/s \n"; + + log << "-- Average runtime ratio --\n"; + log << " Mine / Other's : " << my_average / other_average << " \n"; + } + + static void run_perf(Func myFunc, Func otherFunc, const char *logFile) { + testutils::OutputFileStream log(logFile); + log << " Performance tests with inputs in denormal range:\n"; + run_perf_in_range(myFunc, otherFunc, /* startingBit= */ UIntType(0), + /* endingBit= */ FPBits::MAX_SUBNORMAL, 1'000'001, log); + log << "\n Performance tests with inputs in normal range:\n"; + run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL, + /* endingBit= */ FPBits::MAX_NORMAL, 100'000'001, log); + } +}; + +} // namespace testing +} // namespace __llvm_libc + +#define BINARY_OP_SINGLE_OUTPUT_DIFF(T, myFunc, otherFunc, filename) \ + int main() { \ + __llvm_libc::testing::BinaryOpSingleOutputDiff::run_diff( \ + &myFunc, &otherFunc, filename); \ + return 0; \ + } + +#define BINARY_OP_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \ + int main() { \ + __llvm_libc::testing::BinaryOpSingleOutputDiff::run_perf( \ + &myFunc, &otherFunc, filename); \ + return 0; \ + } diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt index e553bf5bfbef..d3e0e64e03fb 100644 --- a/libc/test/src/math/differential_testing/CMakeLists.txt +++ b/libc/test/src/math/differential_testing/CMakeLists.txt @@ -67,6 +67,12 @@ add_header_library( SingleInputSingleOutputDiff.h ) +add_header_library( + binary_op_single_output_diff + HDRS + BinaryOpSingleOutputDiff.h +) + add_diff_binary( sinf_diff SRCS @@ -368,3 +374,25 @@ add_diff_binary( COMPILE_OPTIONS -fno-builtin ) + +add_diff_binary( + hypotf_perf + SRCS + hypotf_perf.cpp + DEPENDS + .binary_op_single_output_diff + libc.src.math.hypotf + COMPILE_OPTIONS + -fno-builtin +) + +add_diff_binary( + hypot_perf + SRCS + hypot_perf.cpp + DEPENDS + .binary_op_single_output_diff + libc.src.math.hypot + COMPILE_OPTIONS + -fno-builtin +) diff --git a/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h b/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h index 9a516f9069ce..7cc7059db03a 100644 --- a/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h +++ b/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h @@ -46,28 +46,56 @@ template class SingleInputSingleOutputDiff { log << "Total number of differing results: " << diffCount << '\n'; } - static void runPerf(Func myFunc, Func otherFunc, const char *logFile) { - auto runner = [](Func func) { + static void runPerfInRange(Func myFunc, Func otherFunc, UIntType startingBit, + UIntType endingBit, + testutils::OutputFileStream &log) { + auto runner = [=](Func func) { volatile T result; - for (UIntType bits = 0;; ++bits) { + for (UIntType bits = startingBit;; ++bits) { T x = T(FPBits(bits)); result = func(x); - if (bits == UIntMax) + if (bits == endingBit) break; } }; - testutils::OutputFileStream log(logFile); Timer timer; timer.start(); runner(myFunc); timer.stop(); - log << " Run time of my function: " << timer.nanoseconds() << " ns \n"; + + UIntType numberOfRuns = endingBit - startingBit + 1; + double myAverage = static_cast(timer.nanoseconds()) / numberOfRuns; + log << "-- My function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << myAverage << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / myAverage) << " op/s \n"; timer.start(); runner(otherFunc); timer.stop(); - log << "Run time of other function: " << timer.nanoseconds() << " ns \n"; + + double otherAverage = + static_cast(timer.nanoseconds()) / numberOfRuns; + log << "-- Other function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << otherAverage << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / otherAverage) << " op/s \n"; + + log << "-- Average runtime ratio --\n"; + log << " Mine / Other's : " << myAverage / otherAverage << " \n"; + } + + static void runPerf(Func myFunc, Func otherFunc, const char *logFile) { + testutils::OutputFileStream log(logFile); + log << " Performance tests with inputs in denormal range:\n"; + runPerfInRange(myFunc, otherFunc, /* startingBit= */ UIntType(0), + /* endingBit= */ FPBits::MAX_SUBNORMAL, log); + log << "\n Performance tests with inputs in normal range:\n"; + runPerfInRange(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL, + /* endingBit= */ FPBits::MAX_NORMAL, log); } }; diff --git a/libc/test/src/math/differential_testing/hypot_perf.cpp b/libc/test/src/math/differential_testing/hypot_perf.cpp new file mode 100644 index 000000000000..f90605455f76 --- /dev/null +++ b/libc/test/src/math/differential_testing/hypot_perf.cpp @@ -0,0 +1,16 @@ +//===-- Differential test for hypot ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputDiff.h" + +#include "src/math/hypot.h" + +#include + +BINARY_OP_SINGLE_OUTPUT_PERF(double, __llvm_libc::hypot, ::hypot, + "hypot_perf.log") diff --git a/libc/test/src/math/differential_testing/hypotf_perf.cpp b/libc/test/src/math/differential_testing/hypotf_perf.cpp new file mode 100644 index 000000000000..75c61cb0592a --- /dev/null +++ b/libc/test/src/math/differential_testing/hypotf_perf.cpp @@ -0,0 +1,16 @@ +//===-- Differential test for hypotf --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputDiff.h" + +#include "src/math/hypotf.h" + +#include + +BINARY_OP_SINGLE_OUTPUT_PERF(float, __llvm_libc::hypotf, ::hypotf, + "hypotf_perf.log") diff --git a/libc/test/src/math/exp2f_test.cpp b/libc/test/src/math/exp2f_test.cpp index 7db1a576f554..f3a4cc413e1f 100644 --- a/libc/test/src/math/exp2f_test.cpp +++ b/libc/test/src/math/exp2f_test.cpp @@ -101,9 +101,9 @@ TEST(LlvmLibcExpfTest, Underflow) { } TEST(LlvmLibcexp2fTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp index 68d7dff462f5..d675fc1a9534 100644 --- a/libc/test/src/math/expf_test.cpp +++ b/libc/test/src/math/expf_test.cpp @@ -93,9 +93,9 @@ TEST(LlvmLibcExpfTest, Borderline) { } TEST(LlvmLibcExpfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp index cec3080962bf..b8439192151a 100644 --- a/libc/test/src/math/expm1f_test.cpp +++ b/libc/test/src/math/expm1f_test.cpp @@ -93,9 +93,9 @@ TEST(LlvmLibcExpm1fTest, Borderline) { } TEST(LlvmLibcExpm1fTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/fdim_test.cpp b/libc/test/src/math/fdim_test.cpp index a4726f28a382..06b61c2124d2 100644 --- a/libc/test/src/math/fdim_test.cpp +++ b/libc/test/src/math/fdim_test.cpp @@ -16,14 +16,16 @@ using LlvmLibcFDimTest = FDimTestTemplate; -TEST_F(LlvmLibcFDimTest, NaNArg_fdim) { testNaNArg(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, NaNArg_fdim) { test_na_n_arg(&__llvm_libc::fdim); } -TEST_F(LlvmLibcFDimTest, InfArg_fdim) { testInfArg(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, InfArg_fdim) { test_inf_arg(&__llvm_libc::fdim); } -TEST_F(LlvmLibcFDimTest, NegInfArg_fdim) { testNegInfArg(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, NegInfArg_fdim) { + test_neg_inf_arg(&__llvm_libc::fdim); +} -TEST_F(LlvmLibcFDimTest, BothZero_fdim) { testBothZero(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, BothZero_fdim) { test_both_zero(&__llvm_libc::fdim); } TEST_F(LlvmLibcFDimTest, InDoubleRange_fdim) { - testInRange(&__llvm_libc::fdim); + test_in_range(&__llvm_libc::fdim); } diff --git a/libc/test/src/math/fdimf_test.cpp b/libc/test/src/math/fdimf_test.cpp index 45f960cc8955..c85b755738f3 100644 --- a/libc/test/src/math/fdimf_test.cpp +++ b/libc/test/src/math/fdimf_test.cpp @@ -16,16 +16,18 @@ using LlvmLibcFDimTest = FDimTestTemplate; -TEST_F(LlvmLibcFDimTest, NaNArg_fdimf) { testNaNArg(&__llvm_libc::fdimf); } +TEST_F(LlvmLibcFDimTest, NaNArg_fdimf) { test_na_n_arg(&__llvm_libc::fdimf); } -TEST_F(LlvmLibcFDimTest, InfArg_fdimf) { testInfArg(&__llvm_libc::fdimf); } +TEST_F(LlvmLibcFDimTest, InfArg_fdimf) { test_inf_arg(&__llvm_libc::fdimf); } TEST_F(LlvmLibcFDimTest, NegInfArg_fdimf) { - testNegInfArg(&__llvm_libc::fdimf); + test_neg_inf_arg(&__llvm_libc::fdimf); } -TEST_F(LlvmLibcFDimTest, BothZero_fdimf) { testBothZero(&__llvm_libc::fdimf); } +TEST_F(LlvmLibcFDimTest, BothZero_fdimf) { + test_both_zero(&__llvm_libc::fdimf); +} TEST_F(LlvmLibcFDimTest, InFloatRange_fdimf) { - testInRange(&__llvm_libc::fdimf); + test_in_range(&__llvm_libc::fdimf); } diff --git a/libc/test/src/math/fdiml_test.cpp b/libc/test/src/math/fdiml_test.cpp index b82819395b35..79427d6a33d2 100644 --- a/libc/test/src/math/fdiml_test.cpp +++ b/libc/test/src/math/fdiml_test.cpp @@ -16,16 +16,18 @@ using LlvmLibcFDimTest = FDimTestTemplate; -TEST_F(LlvmLibcFDimTest, NaNArg_fdiml) { testNaNArg(&__llvm_libc::fdiml); } +TEST_F(LlvmLibcFDimTest, NaNArg_fdiml) { test_na_n_arg(&__llvm_libc::fdiml); } -TEST_F(LlvmLibcFDimTest, InfArg_fdiml) { testInfArg(&__llvm_libc::fdiml); } +TEST_F(LlvmLibcFDimTest, InfArg_fdiml) { test_inf_arg(&__llvm_libc::fdiml); } TEST_F(LlvmLibcFDimTest, NegInfArg_fdiml) { - testNegInfArg(&__llvm_libc::fdiml); + test_neg_inf_arg(&__llvm_libc::fdiml); } -TEST_F(LlvmLibcFDimTest, BothZero_fdiml) { testBothZero(&__llvm_libc::fdiml); } +TEST_F(LlvmLibcFDimTest, BothZero_fdiml) { + test_both_zero(&__llvm_libc::fdiml); +} TEST_F(LlvmLibcFDimTest, InLongDoubleRange_fdiml) { - testInRange(&__llvm_libc::fdiml); + test_in_range(&__llvm_libc::fdiml); } diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp index 520772797edf..a9388cca72f7 100644 --- a/libc/test/src/math/fma_test.cpp +++ b/libc/test/src/math/fma_test.cpp @@ -13,11 +13,11 @@ using LlvmLibcFmaTest = FmaTestTemplate; TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::fma); + test_special_numbers(&__llvm_libc::fma); } TEST_F(LlvmLibcFmaTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::fma); + test_subnormal_range(&__llvm_libc::fma); } -TEST_F(LlvmLibcFmaTest, NormalRange) { testNormalRange(&__llvm_libc::fma); } +TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fma); } diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp index f58698c95851..73d009499deb 100644 --- a/libc/test/src/math/fmaf_test.cpp +++ b/libc/test/src/math/fmaf_test.cpp @@ -13,11 +13,11 @@ using LlvmLibcFmaTest = FmaTestTemplate; TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::fmaf); + test_special_numbers(&__llvm_libc::fmaf); } TEST_F(LlvmLibcFmaTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::fmaf); + test_subnormal_range(&__llvm_libc::fmaf); } -TEST_F(LlvmLibcFmaTest, NormalRange) { testNormalRange(&__llvm_libc::fmaf); } +TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); } diff --git a/libc/test/src/math/hypot_test.cpp b/libc/test/src/math/hypot_test.cpp index 928c38a494f6..2d7b640adf10 100644 --- a/libc/test/src/math/hypot_test.cpp +++ b/libc/test/src/math/hypot_test.cpp @@ -13,11 +13,13 @@ using LlvmLibcHypotTest = HypotTestTemplate; TEST_F(LlvmLibcHypotTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::hypot); + test_special_numbers(&__llvm_libc::hypot); } TEST_F(LlvmLibcHypotTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::hypot); + test_subnormal_range(&__llvm_libc::hypot); } -TEST_F(LlvmLibcHypotTest, NormalRange) { testNormalRange(&__llvm_libc::hypot); } +TEST_F(LlvmLibcHypotTest, NormalRange) { + test_normal_range(&__llvm_libc::hypot); +} diff --git a/libc/test/src/math/hypotf_test.cpp b/libc/test/src/math/hypotf_test.cpp index 94f102efb73f..2e958d63d84a 100644 --- a/libc/test/src/math/hypotf_test.cpp +++ b/libc/test/src/math/hypotf_test.cpp @@ -13,13 +13,13 @@ using LlvmLibcHypotfTest = HypotTestTemplate; TEST_F(LlvmLibcHypotfTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::hypotf); + test_special_numbers(&__llvm_libc::hypotf); } TEST_F(LlvmLibcHypotfTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::hypotf); + test_subnormal_range(&__llvm_libc::hypotf); } TEST_F(LlvmLibcHypotfTest, NormalRange) { - testNormalRange(&__llvm_libc::hypotf); + test_normal_range(&__llvm_libc::hypotf); } diff --git a/libc/test/src/math/ilogb_test.cpp b/libc/test/src/math/ilogb_test.cpp index 8b8b51cb41be..b049247815d2 100644 --- a/libc/test/src/math/ilogb_test.cpp +++ b/libc/test/src/math/ilogb_test.cpp @@ -16,21 +16,21 @@ #include TEST_F(LlvmLibcILogbTest, SpecialNumbers_ilogb) { - testSpecialNumbers(&__llvm_libc::ilogb); + test_special_numbers(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, PowersOfTwo_ilogb) { - testPowersOfTwo(&__llvm_libc::ilogb); + test_powers_of_two(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, SomeIntegers_ilogb) { - testSomeIntegers(&__llvm_libc::ilogb); + test_some_integers(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, SubnormalRange_ilogb) { - testSubnormalRange(&__llvm_libc::ilogb); + test_subnormal_range(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, NormalRange_ilogb) { - testNormalRange(&__llvm_libc::ilogb); + test_normal_range(&__llvm_libc::ilogb); } diff --git a/libc/test/src/math/ilogbf_test.cpp b/libc/test/src/math/ilogbf_test.cpp index 9fabc01ea969..4b330a8fbbd2 100644 --- a/libc/test/src/math/ilogbf_test.cpp +++ b/libc/test/src/math/ilogbf_test.cpp @@ -16,21 +16,21 @@ #include TEST_F(LlvmLibcILogbTest, SpecialNumbers_ilogbf) { - testSpecialNumbers(&__llvm_libc::ilogbf); + test_special_numbers(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, PowersOfTwo_ilogbf) { - testPowersOfTwo(&__llvm_libc::ilogbf); + test_powers_of_two(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, SomeIntegers_ilogbf) { - testSomeIntegers(&__llvm_libc::ilogbf); + test_some_integers(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, SubnormalRange_ilogbf) { - testSubnormalRange(&__llvm_libc::ilogbf); + test_subnormal_range(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, NormalRange_ilogbf) { - testNormalRange(&__llvm_libc::ilogbf); + test_normal_range(&__llvm_libc::ilogbf); } diff --git a/libc/test/src/math/ilogbl_test.cpp b/libc/test/src/math/ilogbl_test.cpp index f36746b6f64f..5979c96517ab 100644 --- a/libc/test/src/math/ilogbl_test.cpp +++ b/libc/test/src/math/ilogbl_test.cpp @@ -18,21 +18,21 @@ using RunContext = __llvm_libc::testing::RunContext; TEST_F(LlvmLibcILogbTest, SpecialNumbers_ilogbl) { - testSpecialNumbers(&__llvm_libc::ilogbl); + test_special_numbers(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, PowersOfTwo_ilogbl) { - testPowersOfTwo(&__llvm_libc::ilogbl); + test_powers_of_two(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, SomeIntegers_ilogbl) { - testSomeIntegers(&__llvm_libc::ilogbl); + test_some_integers(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, SubnormalRange_ilogbl) { - testSubnormalRange(&__llvm_libc::ilogbl); + test_subnormal_range(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, NormalRange_ilogbl) { - testNormalRange(&__llvm_libc::ilogbl); + test_normal_range(&__llvm_libc::ilogbl); } diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp index a3ada693a3cb..eb4da810486d 100644 --- a/libc/test/src/math/logf_test.cpp +++ b/libc/test/src/math/logf_test.cpp @@ -45,9 +45,9 @@ TEST(LlvmLibcLogfTest, TrickyInputs) { } TEST(LlvmLibcLogfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/sdcomp26094.h b/libc/test/src/math/sdcomp26094.h index c42c12c237af..316288441f94 100644 --- a/libc/test/src/math/sdcomp26094.h +++ b/libc/test/src/math/sdcomp26094.h @@ -16,7 +16,7 @@ namespace __llvm_libc { namespace testing { -static constexpr __llvm_libc::cpp::Array sdcomp26094Values{ +static constexpr __llvm_libc::cpp::Array SDCOMP26094_VALUES{ 0x46427f1b, 0x4647e568, 0x46428bac, 0x4647f1f9, 0x4647fe8a, 0x45d8d7f1, 0x45d371a4, 0x45ce0b57, 0x45d35882, 0x45cdf235, }; diff --git a/libc/test/src/math/sin_test.cpp b/libc/test/src/math/sin_test.cpp index bce7a72a8438..e44907488914 100644 --- a/libc/test/src/math/sin_test.cpp +++ b/libc/test/src/math/sin_test.cpp @@ -20,9 +20,9 @@ DECLARE_SPECIAL_CONSTANTS(double) TEST(LlvmLibcSinTest, Range) { static constexpr double _2pi = 6.283185307179586; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { double x = double(FPBits(v)); // TODO: Expand the range of testing after range reduction is implemented. if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp index 886fc1e7dcac..2b4294d29c05 100644 --- a/libc/test/src/math/sincosf_test.cpp +++ b/libc/test/src/math/sincosf_test.cpp @@ -18,7 +18,7 @@ #include #include -using __llvm_libc::testing::sdcomp26094Values; +using __llvm_libc::testing::SDCOMP26094_VALUES; using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -58,9 +58,9 @@ TEST(LlvmLibcSinCosfTest, SpecialNumbers) { } TEST(LlvmLibcSinCosfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits((v))); if (isnan(x) || isinf(x)) continue; @@ -95,7 +95,7 @@ TEST(LlvmLibcSinCosfTest, SmallValues) { // SDCOMP-26094: check sinf in the cases for which the range reducer // returns values furthest beyond its nominal upper bound of pi/4. TEST(LlvmLibcSinCosfTest, SDCOMP_26094) { - for (uint32_t v : sdcomp26094Values) { + for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits((v))); float sin, cos; __llvm_libc::sincosf(x, &sin, &cos); diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp index 64d932ee22f1..8fa43da08a44 100644 --- a/libc/test/src/math/sinf_test.cpp +++ b/libc/test/src/math/sinf_test.cpp @@ -18,7 +18,7 @@ #include #include -using __llvm_libc::testing::sdcomp26094Values; +using __llvm_libc::testing::SDCOMP26094_VALUES; using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -47,9 +47,9 @@ TEST(LlvmLibcSinfTest, SpecialNumbers) { } TEST(LlvmLibcSinfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; @@ -78,7 +78,7 @@ TEST(LlvmLibcSinfTest, SmallValues) { // SDCOMP-26094: check sinf in the cases for which the range reducer // returns values furthest beyond its nominal upper bound of pi/4. TEST(LlvmLibcSinfTest, SDCOMP_26094) { - for (uint32_t v : sdcomp26094Values) { + for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits((v))); EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0); } diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp index de3d6f16b76e..750463148bc0 100644 --- a/libc/test/src/math/tan_test.cpp +++ b/libc/test/src/math/tan_test.cpp @@ -19,9 +19,9 @@ DECLARE_SPECIAL_CONSTANTS(double) TEST(LlvmLibctanTest, Range) { static constexpr double _2pi = 6.283185307179586; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { double x = double(FPBits(v)); // TODO: Expand the range of testing after range reduction is implemented. if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) diff --git a/libc/test/src/stdlib/atof_test.cpp b/libc/test/src/stdlib/atof_test.cpp index a5379b2ee666..620308c37d60 100644 --- a/libc/test/src/stdlib/atof_test.cpp +++ b/libc/test/src/stdlib/atof_test.cpp @@ -18,35 +18,35 @@ // This is just a simple test to make sure that this function works at all. It's // functionally identical to strtod so the bulk of the testing is there. TEST(LlvmLibcAToFTest, SimpleTest) { - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(uint64_t(0x405ec00000000000)); errno = 0; double result = __llvm_libc::atof("123"); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, 0); } TEST(LlvmLibcAToFTest, FailedParsingTest) { - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(uint64_t(0)); errno = 0; double result = __llvm_libc::atof("???"); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, 0); } diff --git a/libc/test/src/stdlib/bsearch_test.cpp b/libc/test/src/stdlib/bsearch_test.cpp index 6076e47f9ab9..b8bd319f051c 100644 --- a/libc/test/src/stdlib/bsearch_test.cpp +++ b/libc/test/src/stdlib/bsearch_test.cpp @@ -35,44 +35,44 @@ TEST(LlvmLibcBsearchTest, ErrorInputs) { } TEST(LlvmLibcBsearchTest, IntegerArray) { - constexpr int array[25] = {10, 23, 33, 35, 55, 70, 71, + constexpr int ARRAY[25] = {10, 23, 33, 35, 55, 70, 71, 100, 110, 123, 133, 135, 155, 170, 171, 1100, 1110, 1123, 1133, 1135, 1155, 1170, 1171, 11100, 12310}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(ARRAY) / sizeof(int); - for (size_t s = 1; s <= array_size; ++s) { + for (size_t s = 1; s <= ARRAY_SIZE; ++s) { for (size_t i = 0; i < s; ++i) { - int key = array[i]; + int key = ARRAY[i]; void *elem = - __llvm_libc::bsearch(&key, array, s, sizeof(int), int_compare); + __llvm_libc::bsearch(&key, ARRAY, s, sizeof(int), int_compare); ASSERT_EQ(*reinterpret_cast(elem), key); } } // Non existent keys - for (size_t s = 1; s <= array_size; ++s) { + for (size_t s = 1; s <= ARRAY_SIZE; ++s) { int key = 5; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); key = 125; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); key = 136; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); key = 12345; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); } } TEST(LlvmLibcBsearchTest, SameKeyAndArray) { - constexpr int array[5] = {1, 2, 3, 4, 5}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr int ARRAY[5] = {1, 2, 3, 4, 5}; + constexpr size_t ARRAY_SIZE = sizeof(ARRAY) / sizeof(int); void *elem = - __llvm_libc::bsearch(array, array, array_size, sizeof(int), int_compare); - EXPECT_EQ(*reinterpret_cast(elem), array[0]); + __llvm_libc::bsearch(ARRAY, ARRAY, ARRAY_SIZE, sizeof(int), int_compare); + EXPECT_EQ(*reinterpret_cast(elem), ARRAY[0]); } diff --git a/libc/test/src/stdlib/qsort_test.cpp b/libc/test/src/stdlib/qsort_test.cpp index 8de8beee598d..99ccc3fa6046 100644 --- a/libc/test/src/stdlib/qsort_test.cpp +++ b/libc/test/src/stdlib/qsort_test.cpp @@ -27,9 +27,9 @@ TEST(LlvmLibcQsortTest, SortedArray) { int array[25] = {10, 23, 33, 35, 55, 70, 71, 100, 110, 123, 133, 135, 155, 170, 171, 1100, 1110, 1123, 1133, 1135, 1155, 1170, 1171, 11100, 12310}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 10); ASSERT_LE(array[1], 23); @@ -61,11 +61,11 @@ TEST(LlvmLibcQsortTest, SortedArray) { TEST(LlvmLibcQsortTest, ReverseSortedArray) { int array[25] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); - for (int i = 0; i < int(array_size - 1); ++i) + for (int i = 0; i < int(ARRAY_SIZE - 1); ++i) ASSERT_LE(array[i], i + 1); } @@ -73,20 +73,20 @@ TEST(LlvmLibcQsortTest, AllEqualElements) { int array[25] = {100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); - for (size_t i = 0; i < array_size - 1; ++i) + for (size_t i = 0; i < ARRAY_SIZE - 1; ++i) ASSERT_LE(array[i], 100); } TEST(LlvmLibcQsortTest, UnsortedArray1) { int array[25] = {10, 23, 8, 35, 55, 45, 40, 100, 110, 123, 90, 80, 70, 60, 171, 11, 1, -1, -5, -10, 1155, 1170, 1171, 12, -100}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], -100); ASSERT_LE(array[1], -10); @@ -117,9 +117,9 @@ TEST(LlvmLibcQsortTest, UnsortedArray1) { TEST(LlvmLibcQsortTest, UnsortedArray2) { int array[7] = {10, 40, 45, 55, 35, 23, 60}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 10); ASSERT_LE(array[1], 23); @@ -132,9 +132,9 @@ TEST(LlvmLibcQsortTest, UnsortedArray2) { TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements1) { int array[6] = {10, 10, 20, 20, 5, 5}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 5); ASSERT_LE(array[1], 5); @@ -146,9 +146,9 @@ TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements1) { TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements2) { int array[10] = {20, 10, 10, 10, 10, 20, 21, 21, 21, 21}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 10); ASSERT_LE(array[1], 10); @@ -164,9 +164,9 @@ TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements2) { TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements3) { int array[10] = {20, 30, 30, 30, 30, 20, 21, 21, 21, 21}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 20); ASSERT_LE(array[1], 20); @@ -182,9 +182,9 @@ TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements3) { TEST(LlvmLibcQsortTest, UnsortedThreeElementArray1) { int array[3] = {14999024, 0, 3}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 3); @@ -193,9 +193,9 @@ TEST(LlvmLibcQsortTest, UnsortedThreeElementArray1) { TEST(LlvmLibcQsortTest, UnsortedThreeElementArray2) { int array[3] = {3, 14999024, 0}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 3); @@ -204,9 +204,9 @@ TEST(LlvmLibcQsortTest, UnsortedThreeElementArray2) { TEST(LlvmLibcQsortTest, UnsortedThreeElementArray3) { int array[3] = {3, 0, 14999024}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 3); @@ -215,9 +215,9 @@ TEST(LlvmLibcQsortTest, UnsortedThreeElementArray3) { TEST(LlvmLibcQsortTest, SameElementThreeElementArray) { int array[3] = {12345, 12345, 12345}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 12345); ASSERT_LE(array[1], 12345); @@ -226,9 +226,9 @@ TEST(LlvmLibcQsortTest, SameElementThreeElementArray) { TEST(LlvmLibcQsortTest, UnsortedTwoElementArray1) { int array[2] = {14999024, 0}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 14999024); @@ -236,9 +236,9 @@ TEST(LlvmLibcQsortTest, UnsortedTwoElementArray1) { TEST(LlvmLibcQsortTest, UnsortedTwoElementArray2) { int array[2] = {0, 14999024}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 14999024); @@ -246,20 +246,20 @@ TEST(LlvmLibcQsortTest, UnsortedTwoElementArray2) { TEST(LlvmLibcQsortTest, SameElementTwoElementArray) { int array[2] = {12345, 12345}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 12345); ASSERT_LE(array[1], 12345); } TEST(LlvmLibcQSortTest, SingleElementArray) { - constexpr int elem = 12345; - int array[1] = {elem}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr int ELEM = 12345; + int array[1] = {ELEM}; + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); - ASSERT_LE(array[0], elem); + ASSERT_LE(array[0], ELEM); } diff --git a/libc/test/src/stdlib/strtod_test.cpp b/libc/test/src/stdlib/strtod_test.cpp index 154cf1a33e7e..eee11231c044 100644 --- a/libc/test/src/stdlib/strtod_test.cpp +++ b/libc/test/src/stdlib/strtod_test.cpp @@ -17,8 +17,8 @@ class LlvmLibcStrToDTest : public __llvm_libc::testing::Test { public: - void runTest(const char *inputString, const ptrdiff_t expectedStrLen, - const uint64_t expectedRawData, const int expectedErrno = 0) { + void run_test(const char *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData, const int expectedErrno = 0) { // expectedRawData is the expected double result as a uint64_t, organized // according to IEEE754: // @@ -33,58 +33,59 @@ class LlvmLibcStrToDTest : public __llvm_libc::testing::Test { // +-- 11 Exponent Bits // // This is so that the result can be compared in parts. - char *strEnd = nullptr; + char *str_end = nullptr; - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(expectedRawData); errno = 0; - double result = __llvm_libc::strtod(inputString, &strEnd); + double result = __llvm_libc::strtod(inputString, &str_end); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(strEnd - inputString, expectedStrLen); + EXPECT_EQ(str_end - inputString, expectedStrLen); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, expectedErrno); } }; TEST_F(LlvmLibcStrToDTest, SimpleTest) { - runTest("123", 3, uint64_t(0x405ec00000000000)); + run_test("123", 3, uint64_t(0x405ec00000000000)); // This should fail on Eisel-Lemire, forcing a fallback to simple decimal // conversion. - runTest("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8)); + run_test("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8)); // Found while looking for difficult test cases here: // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt - runTest("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb)); + run_test("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb)); - runTest("0x123", 5, uint64_t(0x4072300000000000)); + run_test("0x123", 5, uint64_t(0x4072300000000000)); } // These are tests that have caused problems in the past. TEST_F(LlvmLibcStrToDTest, SpecificFailures) { - runTest("3E70000000000000", 16, uint64_t(0x7FF0000000000000), ERANGE); - runTest("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9)); - runTest("2.16656806400000023841857910156251e9", 36, - uint64_t(0x41e0246690000001)); - runTest("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9)); + run_test("3E70000000000000", 16, uint64_t(0x7FF0000000000000), ERANGE); + run_test("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9)); + run_test("2.16656806400000023841857910156251e9", 36, + uint64_t(0x41e0246690000001)); + run_test("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9)); } TEST_F(LlvmLibcStrToDTest, FuzzFailures) { - runTest("-\xff\xff\xff\xff\xff\xff\xff\x01", 0, uint64_t(0)); - runTest("-.????", 0, uint64_t(0)); - runTest("44444444444444444444444444444444444444444444444444A44444444444444444" - "44444444444*\x99\xff\xff\xff\xff", - 50, uint64_t(0x4a3e68fdd0e0b2d8)); - runTest("-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKNNNNNNNNNNNNNNNNNN?" - "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN?", - 0, uint64_t(0)); - runTest("0x.666E40", 9, uint64_t(0x3fd99b9000000000)); + run_test("-\xff\xff\xff\xff\xff\xff\xff\x01", 0, uint64_t(0)); + run_test("-.????", 0, uint64_t(0)); + run_test( + "44444444444444444444444444444444444444444444444444A44444444444444444" + "44444444444*\x99\xff\xff\xff\xff", + 50, uint64_t(0x4a3e68fdd0e0b2d8)); + run_test("-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKNNNNNNNNNNNNNNNNNN?" + "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN?", + 0, uint64_t(0)); + run_test("0x.666E40", 9, uint64_t(0x3fd99b9000000000)); } diff --git a/libc/test/src/stdlib/strtof_test.cpp b/libc/test/src/stdlib/strtof_test.cpp index db6c194904db..e47541da47cc 100644 --- a/libc/test/src/stdlib/strtof_test.cpp +++ b/libc/test/src/stdlib/strtof_test.cpp @@ -17,8 +17,8 @@ class LlvmLibcStrToFTest : public __llvm_libc::testing::Test { public: - void runTest(const char *inputString, const ptrdiff_t expectedStrLen, - const uint32_t expectedRawData, const int expectedErrno = 0) { + void run_test(const char *inputString, const ptrdiff_t expectedStrLen, + const uint32_t expectedRawData, const int expectedErrno = 0) { // expectedRawData is the expected float result as a uint32_t, organized // according to IEEE754: // @@ -33,23 +33,23 @@ class LlvmLibcStrToFTest : public __llvm_libc::testing::Test { // +-- 8 Exponent Bits // // This is so that the result can be compared in parts. - char *strEnd = nullptr; + char *str_end = nullptr; - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(expectedRawData); errno = 0; - float result = __llvm_libc::strtof(inputString, &strEnd); + float result = __llvm_libc::strtof(inputString, &str_end); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(strEnd - inputString, expectedStrLen); + EXPECT_EQ(str_end - inputString, expectedStrLen); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, expectedErrno); } }; @@ -59,115 +59,117 @@ class LlvmLibcStrToFTest : public __llvm_libc::testing::Test { // them. TEST_F(LlvmLibcStrToFTest, BasicDecimalTests) { - runTest("1", 1, 0x3f800000); - runTest("123", 3, 0x42f60000); - runTest("1234567890", 10, 0x4e932c06u); - runTest("123456789012345678901", 21, 0x60d629d4); - runTest("0.1", 3, 0x3dcccccdu); - runTest(".1", 2, 0x3dcccccdu); - runTest("-0.123456789", 12, 0xbdfcd6eau); - runTest("0.11111111111111111111", 22, 0x3de38e39u); - runTest("0.0000000000000000000000001", 27, 0x15f79688u); + run_test("1", 1, 0x3f800000); + run_test("123", 3, 0x42f60000); + run_test("1234567890", 10, 0x4e932c06u); + run_test("123456789012345678901", 21, 0x60d629d4); + run_test("0.1", 3, 0x3dcccccdu); + run_test(".1", 2, 0x3dcccccdu); + run_test("-0.123456789", 12, 0xbdfcd6eau); + run_test("0.11111111111111111111", 22, 0x3de38e39u); + run_test("0.0000000000000000000000001", 27, 0x15f79688u); } TEST_F(LlvmLibcStrToFTest, DecimalOutOfRangeTests) { - runTest("555E36", 6, 0x7f800000, ERANGE); - runTest("1e-10000", 8, 0x0, ERANGE); + run_test("555E36", 6, 0x7f800000, ERANGE); + run_test("1e-10000", 8, 0x0, ERANGE); } TEST_F(LlvmLibcStrToFTest, DecimalsWithRoundingProblems) { - runTest("20040229", 8, 0x4b98e512); - runTest("20040401", 8, 0x4b98e568); - runTest("9E9", 3, 0x50061c46); + run_test("20040229", 8, 0x4b98e512); + run_test("20040401", 8, 0x4b98e568); + run_test("9E9", 3, 0x50061c46); } TEST_F(LlvmLibcStrToFTest, DecimalSubnormals) { - runTest("1.4012984643248170709237295832899161312802619418765e-45", 55, 0x1, - ERANGE); + run_test("1.4012984643248170709237295832899161312802619418765e-45", 55, 0x1, + ERANGE); } TEST_F(LlvmLibcStrToFTest, DecimalWithLongExponent) { - runTest("1e2147483648", 12, 0x7f800000, ERANGE); - runTest("1e2147483646", 12, 0x7f800000, ERANGE); - runTest("100e2147483646", 14, 0x7f800000, ERANGE); - runTest("1e-2147483647", 13, 0x0, ERANGE); - runTest("1e-2147483649", 13, 0x0, ERANGE); + run_test("1e2147483648", 12, 0x7f800000, ERANGE); + run_test("1e2147483646", 12, 0x7f800000, ERANGE); + run_test("100e2147483646", 14, 0x7f800000, ERANGE); + run_test("1e-2147483647", 13, 0x0, ERANGE); + run_test("1e-2147483649", 13, 0x0, ERANGE); } TEST_F(LlvmLibcStrToFTest, BasicHexadecimalTests) { - runTest("0x1", 3, 0x3f800000); - runTest("0x10", 4, 0x41800000); - runTest("0x11", 4, 0x41880000); - runTest("0x0.1234", 8, 0x3d91a000); + run_test("0x1", 3, 0x3f800000); + run_test("0x10", 4, 0x41800000); + run_test("0x11", 4, 0x41880000); + run_test("0x0.1234", 8, 0x3d91a000); } TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalTests) { - runTest("0x0.0000000000000000000000000000000002", 38, 0x4000, ERANGE); + run_test("0x0.0000000000000000000000000000000002", 38, 0x4000, ERANGE); // This is the largest subnormal number as represented in hex - runTest("0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff, ERANGE); + run_test("0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff, ERANGE); } TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalRoundingTests) { // This is the largest subnormal number that gets rounded down to 0 (as a // float) - runTest("0x0.00000000000000000000000000000000000004", 42, 0x0, ERANGE); + run_test("0x0.00000000000000000000000000000000000004", 42, 0x0, ERANGE); // This is slightly larger, and thus rounded up - runTest("0x0.000000000000000000000000000000000000041", 43, 0x00000001, - ERANGE); + run_test("0x0.000000000000000000000000000000000000041", 43, 0x00000001, + ERANGE); // These check that we're rounding to even properly - runTest("0x0.0000000000000000000000000000000000000b", 42, 0x00000001, ERANGE); - runTest("0x0.0000000000000000000000000000000000000c", 42, 0x00000002, ERANGE); + run_test("0x0.0000000000000000000000000000000000000b", 42, 0x00000001, + ERANGE); + run_test("0x0.0000000000000000000000000000000000000c", 42, 0x00000002, + ERANGE); // These check that we're rounding to even properly even when the input bits // are longer than the bit fields can contain. - runTest("0x1.000000000000000000000p-150", 30, 0x00000000, ERANGE); - runTest("0x1.000010000000000001000p-150", 30, 0x00000001, ERANGE); - runTest("0x1.000100000000000001000p-134", 30, 0x00008001, ERANGE); - runTest("0x1.FFFFFC000000000001000p-127", 30, 0x007FFFFF, ERANGE); - runTest("0x1.FFFFFE000000000000000p-127", 30, 0x00800000); + run_test("0x1.000000000000000000000p-150", 30, 0x00000000, ERANGE); + run_test("0x1.000010000000000001000p-150", 30, 0x00000001, ERANGE); + run_test("0x1.000100000000000001000p-134", 30, 0x00008001, ERANGE); + run_test("0x1.FFFFFC000000000001000p-127", 30, 0x007FFFFF, ERANGE); + run_test("0x1.FFFFFE000000000000000p-127", 30, 0x00800000); } TEST_F(LlvmLibcStrToFTest, HexadecimalNormalRoundingTests) { // This also checks the round to even behavior by checking three adjacent // numbers. // This gets rounded down to even - runTest("0x123456500", 11, 0x4f91a2b2); + run_test("0x123456500", 11, 0x4f91a2b2); // This doesn't get rounded at all - runTest("0x123456600", 11, 0x4f91a2b3); + run_test("0x123456600", 11, 0x4f91a2b3); // This gets rounded up to even - runTest("0x123456700", 11, 0x4f91a2b4); + run_test("0x123456700", 11, 0x4f91a2b4); // Correct rounding for long input - runTest("0x1.000001000000000000000", 25, 0x3f800000); - runTest("0x1.000001000000000000100", 25, 0x3f800001); + run_test("0x1.000001000000000000000", 25, 0x3f800000); + run_test("0x1.000001000000000000100", 25, 0x3f800001); } TEST_F(LlvmLibcStrToFTest, HexadecimalsWithRoundingProblems) { - runTest("0xFFFFFFFF", 10, 0x4f800000); + run_test("0xFFFFFFFF", 10, 0x4f800000); } TEST_F(LlvmLibcStrToFTest, HexadecimalOutOfRangeTests) { - runTest("0x123456789123456789123456789123456789", 38, 0x7f800000, ERANGE); - runTest("-0x123456789123456789123456789123456789", 39, 0xff800000, ERANGE); - runTest("0x0.00000000000000000000000000000000000001", 42, 0x0, ERANGE); + run_test("0x123456789123456789123456789123456789", 38, 0x7f800000, ERANGE); + run_test("-0x123456789123456789123456789123456789", 39, 0xff800000, ERANGE); + run_test("0x0.00000000000000000000000000000000000001", 42, 0x0, ERANGE); } TEST_F(LlvmLibcStrToFTest, InfTests) { - runTest("INF", 3, 0x7f800000); - runTest("INFinity", 8, 0x7f800000); - runTest("infnity", 3, 0x7f800000); - runTest("infinit", 3, 0x7f800000); - runTest("infinfinit", 3, 0x7f800000); - runTest("innf", 0, 0x0); - runTest("-inf", 4, 0xff800000); - runTest("-iNfInItY", 9, 0xff800000); + run_test("INF", 3, 0x7f800000); + run_test("INFinity", 8, 0x7f800000); + run_test("infnity", 3, 0x7f800000); + run_test("infinit", 3, 0x7f800000); + run_test("infinfinit", 3, 0x7f800000); + run_test("innf", 0, 0x0); + run_test("-inf", 4, 0xff800000); + run_test("-iNfInItY", 9, 0xff800000); } TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) { - runTest("NaN", 3, 0x7fc00000); - runTest("-nAn", 4, 0xffc00000); + run_test("NaN", 3, 0x7fc00000); + run_test("-nAn", 4, 0xffc00000); } // These NaNs are of the form `NaN(n-character-sequence)` where the @@ -178,26 +180,26 @@ TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) { // the result is put into the mantissa if it takes up the whole width of the // parentheses. TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesEmptyTest) { - runTest("NaN()", 5, 0x7fc00000); + run_test("NaN()", 5, 0x7fc00000); } TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidNumberTests) { - runTest("NaN(1234)", 9, 0x7fc004d2); - runTest("NaN(0x1234)", 11, 0x7fc01234); - runTest("NaN(01234)", 10, 0x7fc0029c); + run_test("NaN(1234)", 9, 0x7fc004d2); + run_test("NaN(0x1234)", 11, 0x7fc01234); + run_test("NaN(01234)", 10, 0x7fc0029c); } TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesInvalidSequenceTests) { - runTest("NaN( 1234)", 3, 0x7fc00000); - runTest("NaN(-1234)", 3, 0x7fc00000); - runTest("NaN(asd&f)", 3, 0x7fc00000); - runTest("NaN(123 )", 3, 0x7fc00000); - runTest("NaN(123+asdf)", 3, 0x7fc00000); - runTest("NaN(123", 3, 0x7fc00000); + run_test("NaN( 1234)", 3, 0x7fc00000); + run_test("NaN(-1234)", 3, 0x7fc00000); + run_test("NaN(asd&f)", 3, 0x7fc00000); + run_test("NaN(123 )", 3, 0x7fc00000); + run_test("NaN(123+asdf)", 3, 0x7fc00000); + run_test("NaN(123", 3, 0x7fc00000); } TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidSequenceInvalidNumberTests) { - runTest("NaN(1a)", 7, 0x7fc00000); - runTest("NaN(asdf)", 9, 0x7fc00000); - runTest("NaN(1A1)", 8, 0x7fc00000); + run_test("NaN(1a)", 7, 0x7fc00000); + run_test("NaN(asdf)", 9, 0x7fc00000); + run_test("NaN(1A1)", 8, 0x7fc00000); } diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp index c1d41bc302f5..0aa8f45e623d 100644 --- a/libc/test/src/stdlib/strtold_test.cpp +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -17,12 +17,12 @@ class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { public: - void runTest(const char *inputString, const ptrdiff_t expectedStrLen, - const uint64_t expectedRawData64, - const __uint128_t expectedRawData80, - const __uint128_t expectedRawData128, - const int expectedErrno64 = 0, const int expectedErrno80 = 0, - const int expectedErrno128 = 0) { + void run_test(const char *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData64, + const __uint128_t expectedRawData80, + const __uint128_t expectedRawData128, + const int expectedErrno64 = 0, const int expectedErrno80 = 0, + const int expectedErrno128 = 0) { // expectedRawData64 is the expected long double result as a uint64_t, // organized according to the IEEE754 double precision format: // @@ -61,16 +61,16 @@ class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { // +------+------+ // | // +-- 15 Exponent Bits - char *strEnd = nullptr; + char *str_end = nullptr; #if defined(LONG_DOUBLE_IS_DOUBLE) __llvm_libc::fputil::FPBits expectedFP = __llvm_libc::fputil::FPBits(expectedRawData64); const int expectedErrno = expectedErrno64; #elif defined(SPECIAL_X86_LONG_DOUBLE) - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(expectedRawData80); - const int expectedErrno = expectedErrno80; + const int expected_errno = expectedErrno80; #else __llvm_libc::fputil::FPBits expectedFP = __llvm_libc::fputil::FPBits(expectedRawData128); @@ -78,144 +78,146 @@ class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { #endif errno = 0; - long double result = __llvm_libc::strtold(inputString, &strEnd); + long double result = __llvm_libc::strtold(inputString, &str_end); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(); - actualFP = __llvm_libc::fputil::FPBits(result); + actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(strEnd - inputString, expectedStrLen); + EXPECT_EQ(str_end - inputString, expectedStrLen); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); - EXPECT_EQ(errno, expectedErrno); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); + EXPECT_EQ(errno, expected_errno); } }; TEST_F(LlvmLibcStrToLDTest, SimpleTest) { - runTest("123", 3, uint64_t(0x405ec00000000000), - __uint128_t(0x4005f60000) << 40, - __uint128_t(0x4005ec0000000000) << 64); + run_test("123", 3, uint64_t(0x405ec00000000000), + __uint128_t(0x4005f60000) << 40, + __uint128_t(0x4005ec0000000000) << 64); // This should fail on Eisel-Lemire, forcing a fallback to simple decimal // conversion. - runTest("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8), - (__uint128_t(0x403eab54a9) << 40) + __uint128_t(0x8ceb1ec400), - (__uint128_t(0x403e56a95319d63d) << 64) + - __uint128_t(0x8800000000000000)); + run_test("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8), + (__uint128_t(0x403eab54a9) << 40) + __uint128_t(0x8ceb1ec400), + (__uint128_t(0x403e56a95319d63d) << 64) + + __uint128_t(0x8800000000000000)); // Found while looking for difficult test cases here: // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt - runTest("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb), - (__uint128_t(0x4062dc3bcf) << 40) + __uint128_t(0x923a6fd402), - (__uint128_t(0x4062b8779f2474df) << 64) + - __uint128_t(0xa804bfd8c6d5c000)); - - runTest("0x123", 5, uint64_t(0x4072300000000000), - (__uint128_t(0x4007918000) << 40), - (__uint128_t(0x4007230000000000) << 64)); + run_test("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb), + (__uint128_t(0x4062dc3bcf) << 40) + __uint128_t(0x923a6fd402), + (__uint128_t(0x4062b8779f2474df) << 64) + + __uint128_t(0xa804bfd8c6d5c000)); + + run_test("0x123", 5, uint64_t(0x4072300000000000), + (__uint128_t(0x4007918000) << 40), + (__uint128_t(0x4007230000000000) << 64)); } // These are tests that have caused problems for doubles in the past. TEST_F(LlvmLibcStrToLDTest, Float64SpecificFailures) { - runTest("3E70000000000000", 16, uint64_t(0x7FF0000000000000), - (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7fff000000000000) << 64), ERANGE, ERANGE, ERANGE); - runTest("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9), - (__uint128_t(0x3fadddd953) << 40) + __uint128_t(0x464e85c400), - (__uint128_t(0x3fadbbb2a68c9d0b) << 64) + - __uint128_t(0x8800e7969e1c5fc8)); - runTest( + run_test("3E70000000000000", 16, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64), ERANGE, ERANGE, ERANGE); + run_test("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9), + (__uint128_t(0x3fadddd953) << 40) + __uint128_t(0x464e85c400), + (__uint128_t(0x3fadbbb2a68c9d0b) << 64) + + __uint128_t(0x8800e7969e1c5fc8)); + run_test( "2.16656806400000023841857910156251e9", 36, uint64_t(0x41e0246690000001), (__uint128_t(0x401e812334) << 40) + __uint128_t(0x8000000400), (__uint128_t(0x401e024669000000) << 64) + __uint128_t(0x800000000000018)); - runTest("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9), - (__uint128_t(0x403fc1f099) << 40) + __uint128_t(0x5e30464402), - (__uint128_t(0x403f83e132bc608c) << 64) + - __uint128_t(0x8803000000000000)); + run_test("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9), + (__uint128_t(0x403fc1f099) << 40) + __uint128_t(0x5e30464402), + (__uint128_t(0x403f83e132bc608c) << 64) + + __uint128_t(0x8803000000000000)); } TEST_F(LlvmLibcStrToLDTest, MaxSizeNumbers) { - runTest("1.1897314953572317650e4932", 26, uint64_t(0x7FF0000000000000), - (__uint128_t(0x7ffeffffff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7ffeffffffffffff) << 64) + - __uint128_t(0xfffd57322e3f8675), - ERANGE, 0, 0); - runTest("1.18973149535723176508e4932", 27, uint64_t(0x7FF0000000000000), - (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7ffeffffffffffff) << 64) + - __uint128_t(0xffffd2478338036c), - ERANGE, ERANGE, 0); + run_test("1.1897314953572317650e4932", 26, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7ffeffffff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7ffeffffffffffff) << 64) + + __uint128_t(0xfffd57322e3f8675), + ERANGE, 0, 0); + run_test("1.18973149535723176508e4932", 27, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7ffeffffffffffff) << 64) + + __uint128_t(0xffffd2478338036c), + ERANGE, ERANGE, 0); } // These tests check subnormal behavior for 80 bit and 128 bit floats. They will // be too small for 64 bit floats. TEST_F(LlvmLibcStrToLDTest, SubnormalTests) { - runTest("1e-4950", 7, uint64_t(0), (__uint128_t(0x00000000000000000003)), - (__uint128_t(0x000000000000000000057c9647e1a018)), ERANGE, ERANGE, - ERANGE); - runTest("1.89e-4951", 10, uint64_t(0), (__uint128_t(0x00000000000000000001)), - (__uint128_t(0x0000000000000000000109778a006738)), ERANGE, ERANGE, - ERANGE); - runTest("4e-4966", 7, uint64_t(0), (__uint128_t(0)), - (__uint128_t(0x00000000000000000000000000000001)), ERANGE, ERANGE, - ERANGE); + run_test("1e-4950", 7, uint64_t(0), (__uint128_t(0x00000000000000000003)), + (__uint128_t(0x000000000000000000057c9647e1a018)), ERANGE, ERANGE, + ERANGE); + run_test("1.89e-4951", 10, uint64_t(0), (__uint128_t(0x00000000000000000001)), + (__uint128_t(0x0000000000000000000109778a006738)), ERANGE, ERANGE, + ERANGE); + run_test("4e-4966", 7, uint64_t(0), (__uint128_t(0)), + (__uint128_t(0x00000000000000000000000000000001)), ERANGE, ERANGE, + ERANGE); } TEST_F(LlvmLibcStrToLDTest, SmallNormalTests) { - runTest("3.37e-4932", 10, uint64_t(0), - (__uint128_t(0x1804cf7) << 40) + __uint128_t(0x908850712), - (__uint128_t(0x10099ee12110a) << 64) + __uint128_t(0xe24b75c0f50dc0c), - ERANGE, 0, 0); + run_test("3.37e-4932", 10, uint64_t(0), + (__uint128_t(0x1804cf7) << 40) + __uint128_t(0x908850712), + (__uint128_t(0x10099ee12110a) << 64) + + __uint128_t(0xe24b75c0f50dc0c), + ERANGE, 0, 0); } TEST_F(LlvmLibcStrToLDTest, ComplexHexadecimalTests) { - runTest("0x1p16383", 9, 0x7ff0000000000000, (__uint128_t(0x7ffe800000) << 40), - (__uint128_t(0x7ffe000000000000) << 64)); - runTest("0x123456789abcdef", 17, 0x43723456789abcdf, - (__uint128_t(0x403791a2b3) << 40) + __uint128_t(0xc4d5e6f780), - (__uint128_t(0x403723456789abcd) << 64) + - __uint128_t(0xef00000000000000)); - runTest("0x123456789abcdef0123456789ABCDEF", 33, 0x7ff0000000000000, - (__uint128_t(0x407791a2b3) << 40) + __uint128_t(0xc4d5e6f781), - (__uint128_t(0x407723456789abcd) << 64) + - __uint128_t(0xef0123456789abce)); + run_test("0x1p16383", 9, 0x7ff0000000000000, + (__uint128_t(0x7ffe800000) << 40), + (__uint128_t(0x7ffe000000000000) << 64)); + run_test("0x123456789abcdef", 17, 0x43723456789abcdf, + (__uint128_t(0x403791a2b3) << 40) + __uint128_t(0xc4d5e6f780), + (__uint128_t(0x403723456789abcd) << 64) + + __uint128_t(0xef00000000000000)); + run_test("0x123456789abcdef0123456789ABCDEF", 33, 0x7ff0000000000000, + (__uint128_t(0x407791a2b3) << 40) + __uint128_t(0xc4d5e6f781), + (__uint128_t(0x407723456789abcd) << 64) + + __uint128_t(0xef0123456789abce)); } TEST_F(LlvmLibcStrToLDTest, InfTests) { - runTest("INF", 3, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7fff000000000000) << 64)); - runTest("INFinity", 8, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7fff000000000000) << 64)); - runTest("-inf", 4, 0xfff0000000000000, (__uint128_t(0xffff800000) << 40), - (__uint128_t(0xffff000000000000) << 64)); + run_test("INF", 3, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64)); + run_test("INFinity", 8, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64)); + run_test("-inf", 4, 0xfff0000000000000, (__uint128_t(0xffff800000) << 40), + (__uint128_t(0xffff000000000000) << 64)); } TEST_F(LlvmLibcStrToLDTest, NaNTests) { - runTest("NaN", 3, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), - (__uint128_t(0x7fff800000000000) << 64)); - runTest("-nAn", 4, 0xfff8000000000000, (__uint128_t(0xffffc00000) << 40), - (__uint128_t(0xffff800000000000) << 64)); - runTest("NaN()", 5, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), - (__uint128_t(0x7fff800000000000) << 64)); - runTest("NaN(1234)", 9, 0x7ff80000000004d2, - (__uint128_t(0x7fffc00000) << 40) + __uint128_t(0x4d2), - (__uint128_t(0x7fff800000000000) << 64) + __uint128_t(0x4d2)); - runTest("NaN(0xffffffffffff)", 19, 0x7ff8ffffffffffff, - (__uint128_t(0x7fffc000ff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7fff800000000000) << 64) + - __uint128_t(0xffffffffffff)); - runTest("NaN(0xfffffffffffff)", 20, 0x7fffffffffffffff, - (__uint128_t(0x7fffc00fff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7fff800000000000) << 64) + - __uint128_t(0xfffffffffffff)); - runTest("NaN(0xffffffffffffffff)", 23, 0x7fffffffffffffff, - (__uint128_t(0x7fffffffff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7fff800000000000) << 64) + - __uint128_t(0xffffffffffffffff)); - runTest("NaN( 1234)", 3, 0x7ff8000000000000, - (__uint128_t(0x7fffc00000) << 40), - (__uint128_t(0x7fff800000000000) << 64)); + run_test("NaN", 3, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); + run_test("-nAn", 4, 0xfff8000000000000, (__uint128_t(0xffffc00000) << 40), + (__uint128_t(0xffff800000000000) << 64)); + run_test("NaN()", 5, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); + run_test("NaN(1234)", 9, 0x7ff80000000004d2, + (__uint128_t(0x7fffc00000) << 40) + __uint128_t(0x4d2), + (__uint128_t(0x7fff800000000000) << 64) + __uint128_t(0x4d2)); + run_test("NaN(0xffffffffffff)", 19, 0x7ff8ffffffffffff, + (__uint128_t(0x7fffc000ff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xffffffffffff)); + run_test("NaN(0xfffffffffffff)", 20, 0x7fffffffffffffff, + (__uint128_t(0x7fffc00fff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xfffffffffffff)); + run_test("NaN(0xffffffffffffffff)", 23, 0x7fffffffffffffff, + (__uint128_t(0x7fffffffff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xffffffffffffffff)); + run_test("NaN( 1234)", 3, 0x7ff8000000000000, + (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); } diff --git a/libc/test/src/string/bcmp_test.cpp b/libc/test/src/string/bcmp_test.cpp index dfcad7aff259..19df7ad2637a 100644 --- a/libc/test/src/string/bcmp_test.cpp +++ b/libc/test/src/string/bcmp_test.cpp @@ -34,25 +34,25 @@ TEST(LlvmLibcBcmpTest, LhsAfterRhsLexically) { } TEST(LlvmLibcBcmpTest, Sweep) { - static constexpr size_t kMaxSize = 1024; - char lhs[kMaxSize]; - char rhs[kMaxSize]; + static constexpr size_t K_MAX_SIZE = 1024; + char lhs[K_MAX_SIZE]; + char rhs[K_MAX_SIZE]; const auto reset = [](char *const ptr) { - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) ptr[i] = 'a'; }; reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) EXPECT_EQ(__llvm_libc::bcmp(lhs, rhs, i), 0); reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) { + for (size_t i = 0; i < K_MAX_SIZE; ++i) { rhs[i] = 'b'; - EXPECT_NE(__llvm_libc::bcmp(lhs, rhs, kMaxSize), 0); + EXPECT_NE(__llvm_libc::bcmp(lhs, rhs, K_MAX_SIZE), 0); rhs[i] = 'a'; } } diff --git a/libc/test/src/string/bzero_test.cpp b/libc/test/src/string/bzero_test.cpp index a997cbc43f46..f8edfa542d77 100644 --- a/libc/test/src/string/bzero_test.cpp +++ b/libc/test/src/string/bzero_test.cpp @@ -14,10 +14,10 @@ using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; using Data = Array; -static const ArrayRef kDeadcode("DEADC0DE", 8); +static const ArrayRef k_deadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. -Data getData(ArrayRef filler) { +Data get_data(ArrayRef filler) { Data out; for (size_t i = 0; i < out.size(); ++i) out[i] = filler[i % filler.size()]; @@ -25,7 +25,7 @@ Data getData(ArrayRef filler) { } TEST(LlvmLibcBzeroTest, Thorough) { - const Data dirty = getData(kDeadcode); + const Data dirty = get_data(k_deadcode); for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { auto buffer = dirty; diff --git a/libc/test/src/string/memcmp_test.cpp b/libc/test/src/string/memcmp_test.cpp index 35ee741514aa..0cd33ebdaf85 100644 --- a/libc/test/src/string/memcmp_test.cpp +++ b/libc/test/src/string/memcmp_test.cpp @@ -34,25 +34,25 @@ TEST(LlvmLibcMemcmpTest, LhsAfterRhsLexically) { } TEST(LlvmLibcMemcmpTest, Sweep) { - static constexpr size_t kMaxSize = 1024; - char lhs[kMaxSize]; - char rhs[kMaxSize]; + static constexpr size_t K_MAX_SIZE = 1024; + char lhs[K_MAX_SIZE]; + char rhs[K_MAX_SIZE]; const auto reset = [](char *const ptr) { - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) ptr[i] = 'a'; }; reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) EXPECT_EQ(__llvm_libc::memcmp(lhs, rhs, i), 0); reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) { + for (size_t i = 0; i < K_MAX_SIZE; ++i) { rhs[i] = 'z'; - EXPECT_LT(__llvm_libc::memcmp(lhs, rhs, kMaxSize), 0); + EXPECT_LT(__llvm_libc::memcmp(lhs, rhs, K_MAX_SIZE), 0); rhs[i] = 'a'; } } diff --git a/libc/test/src/string/memcpy_test.cpp b/libc/test/src/string/memcpy_test.cpp index 9a7c9ba7f7a0..ed4bbd45c410 100644 --- a/libc/test/src/string/memcpy_test.cpp +++ b/libc/test/src/string/memcpy_test.cpp @@ -14,11 +14,11 @@ using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; using Data = Array; -static const ArrayRef kNumbers("0123456789", 10); -static const ArrayRef kDeadcode("DEADC0DE", 8); +static const ArrayRef k_numbers("0123456789", 10); +static const ArrayRef k_deadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. -Data getData(ArrayRef filler) { +Data get_data(ArrayRef filler) { Data out; for (size_t i = 0; i < out.size(); ++i) out[i] = filler[i % filler.size()]; @@ -26,8 +26,8 @@ Data getData(ArrayRef filler) { } TEST(LlvmLibcMemcpyTest, Thorough) { - const Data groundtruth = getData(kNumbers); - const Data dirty = getData(kDeadcode); + const Data groundtruth = get_data(k_numbers); + const Data dirty = get_data(k_deadcode); for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { auto buffer = dirty; diff --git a/libc/test/src/string/memset_test.cpp b/libc/test/src/string/memset_test.cpp index c3f65cf6fd6c..2d07e7bb0c73 100644 --- a/libc/test/src/string/memset_test.cpp +++ b/libc/test/src/string/memset_test.cpp @@ -14,10 +14,10 @@ using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; using Data = Array; -static const ArrayRef kDeadcode("DEADC0DE", 8); +static const ArrayRef k_deadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. -Data getData(ArrayRef filler) { +Data get_data(ArrayRef filler) { Data out; for (size_t i = 0; i < out.size(); ++i) out[i] = filler[i % filler.size()]; @@ -25,7 +25,7 @@ Data getData(ArrayRef filler) { } TEST(LlvmLibcMemsetTest, Thorough) { - const Data dirty = getData(kDeadcode); + const Data dirty = get_data(k_deadcode); for (int value = -1; value <= 1; ++value) { for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { diff --git a/libc/test/src/string/stpcpy_test.cpp b/libc/test/src/string/stpcpy_test.cpp index 90ec5311b9b9..e5ed3fb06f78 100644 --- a/libc/test/src/string/stpcpy_test.cpp +++ b/libc/test/src/string/stpcpy_test.cpp @@ -13,33 +13,33 @@ TEST(LlvmLibcStpCpyTest, EmptySrc) { const char *empty = ""; - size_t srcSize = __llvm_libc::internal::string_length(empty); + size_t src_size = __llvm_libc::internal::string_length(empty); char dest[4] = {'a', 'b', 'c', '\0'}; char *result = __llvm_libc::stpcpy(dest, empty); - ASSERT_EQ(dest + srcSize, result); + ASSERT_EQ(dest + src_size, result); ASSERT_EQ(result[0], '\0'); ASSERT_STREQ(dest, empty); } TEST(LlvmLibcStpCpyTest, EmptyDest) { const char *abc = "abc"; - size_t srcSize = __llvm_libc::internal::string_length(abc); + size_t src_size = __llvm_libc::internal::string_length(abc); char dest[4]; char *result = __llvm_libc::stpcpy(dest, abc); - ASSERT_EQ(dest + srcSize, result); + ASSERT_EQ(dest + src_size, result); ASSERT_EQ(result[0], '\0'); ASSERT_STREQ(dest, abc); } TEST(LlvmLibcStpCpyTest, OffsetDest) { const char *abc = "abc"; - size_t srcSize = __llvm_libc::internal::string_length(abc); + size_t src_size = __llvm_libc::internal::string_length(abc); char dest[7] = {'x', 'y', 'z'}; char *result = __llvm_libc::stpcpy(dest + 3, abc); - ASSERT_EQ(dest + 3 + srcSize, result); + ASSERT_EQ(dest + 3 + src_size, result); ASSERT_EQ(result[0], '\0'); ASSERT_STREQ(dest, "xyzabc"); } diff --git a/libc/test/src/threads/call_once_test.cpp b/libc/test/src/threads/call_once_test.cpp index 18ed71015763..b0e9fb8ccc71 100644 --- a/libc/test/src/threads/call_once_test.cpp +++ b/libc/test/src/threads/call_once_test.cpp @@ -18,7 +18,7 @@ #include -static constexpr unsigned int num_threads = 5; +static constexpr unsigned int NUM_THREADS = 5; static atomic_uint thread_count; static unsigned int call_count; @@ -38,13 +38,13 @@ TEST(LlvmLibcCallOnceTest, CallFrom5Threads) { call_count = 0; thread_count = 0; - thrd_t threads[num_threads]; - for (unsigned int i = 0; i < num_threads; ++i) { + thrd_t threads[NUM_THREADS]; + for (unsigned int i = 0; i < NUM_THREADS; ++i) { ASSERT_EQ(__llvm_libc::thrd_create(threads + i, func, nullptr), static_cast(thrd_success)); } - for (unsigned int i = 0; i < num_threads; ++i) { + for (unsigned int i = 0; i < NUM_THREADS; ++i) { int retval; ASSERT_EQ(__llvm_libc::thrd_join(threads + i, &retval), static_cast(thrd_success)); diff --git a/libc/test/src/threads/mtx_test.cpp b/libc/test/src/threads/mtx_test.cpp index 8ee8cf5bb0dd..3abe77e0eab7 100644 --- a/libc/test/src/threads/mtx_test.cpp +++ b/libc/test/src/threads/mtx_test.cpp @@ -69,11 +69,11 @@ TEST(LlvmLibcMutexTest, RelayCounter) { } mtx_t start_lock, step_lock; -bool start, step; +bool started, step; int stepper(void *arg) { __llvm_libc::mtx_lock(&start_lock); - start = true; + started = true; __llvm_libc::mtx_unlock(&start_lock); __llvm_libc::mtx_lock(&step_lock); @@ -92,7 +92,7 @@ TEST(LlvmLibcMutexTest, WaitAndStep) { // step. Once we ensure that the thread is blocked, we unblock it. // After unblocking, we then verify that the thread was indeed unblocked. step = false; - start = false; + started = false; ASSERT_EQ(__llvm_libc::mtx_lock(&step_lock), static_cast(thrd_success)); thrd_t thread; @@ -102,7 +102,7 @@ TEST(LlvmLibcMutexTest, WaitAndStep) { // Make sure the thread actually started. ASSERT_EQ(__llvm_libc::mtx_lock(&start_lock), static_cast(thrd_success)); - bool s = start; + bool s = started; ASSERT_EQ(__llvm_libc::mtx_unlock(&start_lock), static_cast(thrd_success)); if (s) diff --git a/libc/test/src/time/TmHelper.h b/libc/test/src/time/TmHelper.h index 1ab2a5eeeba6..54d9f4eb72ce 100644 --- a/libc/test/src/time/TmHelper.h +++ b/libc/test/src/time/TmHelper.h @@ -20,9 +20,9 @@ namespace tmhelper { namespace testing { // A helper function to initialize tm data structure. -static inline void InitializeTmData(struct tm *tm_data, int year, int month, - int mday, int hour, int min, int sec, - int wday, int yday) { +static inline void initialize_tm_data(struct tm *tm_data, int year, int month, + int mday, int hour, int min, int sec, + int wday, int yday) { struct tm temp = {.tm_sec = sec, .tm_min = min, .tm_hour = hour, diff --git a/libc/test/src/time/asctime_r_test.cpp b/libc/test/src/time/asctime_r_test.cpp index 1f883cf0cac1..f9ccdda50a32 100644 --- a/libc/test/src/time/asctime_r_test.cpp +++ b/libc/test/src/time/asctime_r_test.cpp @@ -17,8 +17,8 @@ using __llvm_libc::time_utils::TimeConstants; static inline char *call_asctime_r(struct tm *tm_data, int year, int month, int mday, int hour, int min, int sec, int wday, int yday, char *buffer) { - __llvm_libc::tmhelper::testing::InitializeTmData(tm_data, year, month, mday, - hour, min, sec, wday, yday); + __llvm_libc::tmhelper::testing::initialize_tm_data( + tm_data, year, month, mday, hour, min, sec, wday, yday); return __llvm_libc::asctime_r(tm_data, buffer); } diff --git a/libc/test/src/time/asctime_test.cpp b/libc/test/src/time/asctime_test.cpp index 14201fd5a0de..0b0c159eb53a 100644 --- a/libc/test/src/time/asctime_test.cpp +++ b/libc/test/src/time/asctime_test.cpp @@ -13,8 +13,8 @@ static inline char *call_asctime(struct tm *tm_data, int year, int month, int mday, int hour, int min, int sec, int wday, int yday) { - __llvm_libc::tmhelper::testing::InitializeTmData(tm_data, year, month, mday, - hour, min, sec, wday, yday); + __llvm_libc::tmhelper::testing::initialize_tm_data( + tm_data, year, month, mday, hour, min, sec, wday, yday); return __llvm_libc::asctime(tm_data); } diff --git a/libc/test/src/time/mktime_test.cpp b/libc/test/src/time/mktime_test.cpp index d08a6b9640dd..16b814f1d425 100644 --- a/libc/test/src/time/mktime_test.cpp +++ b/libc/test/src/time/mktime_test.cpp @@ -24,14 +24,14 @@ using __llvm_libc::time_utils::TimeConstants; static inline time_t call_mktime(struct tm *tm_data, int year, int month, int mday, int hour, int min, int sec, int wday, int yday) { - __llvm_libc::tmhelper::testing::InitializeTmData(tm_data, year, month, mday, - hour, min, sec, wday, yday); + __llvm_libc::tmhelper::testing::initialize_tm_data( + tm_data, year, month, mday, hour, min, sec, wday, yday); return __llvm_libc::mktime(tm_data); } TEST(LlvmLibcMkTime, FailureSetsErrno) { struct tm tm_data; - __llvm_libc::tmhelper::testing::InitializeTmData( + __llvm_libc::tmhelper::testing::initialize_tm_data( &tm_data, INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, -1, 0, 0); EXPECT_THAT(__llvm_libc::mktime(&tm_data), Fails(EOVERFLOW)); } diff --git a/libc/test/src/unistd/write_test.cpp b/libc/test/src/unistd/write_test.cpp index 2313d97d5888..56243846083a 100644 --- a/libc/test/src/unistd/write_test.cpp +++ b/libc/test/src/unistd/write_test.cpp @@ -14,10 +14,10 @@ TEST(LlvmLibcUniStd, WriteBasic) { using __llvm_libc::testing::ErrnoSetterMatcher::Succeeds; - constexpr const char *hello = "hello"; + constexpr const char *HELLO = "hello"; __llvm_libc::testutils::FDReader reader; - EXPECT_THAT(__llvm_libc::write(reader.getWriteFD(), hello, 5), Succeeds(5)); - EXPECT_TRUE(reader.matchWritten(hello)); + EXPECT_THAT(__llvm_libc::write(reader.get_write_fd(), HELLO, 5), Succeeds(5)); + EXPECT_TRUE(reader.match_written(HELLO)); } TEST(LlvmLibcUniStd, WriteFails) { diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 7c4f061b47b5..4e25c874d122 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -38,73 +38,77 @@ namespace mpfr { template struct Precision; template <> struct Precision { - static constexpr unsigned int value = 24; + static constexpr unsigned int VALUE = 24; }; template <> struct Precision { - static constexpr unsigned int value = 53; + static constexpr unsigned int VALUE = 53; }; #if !(defined(LLVM_LIBC_ARCH_X86)) template <> struct Precision { - static constexpr unsigned int value = 64; + static constexpr unsigned int VALUE = 64; }; #else template <> struct Precision { - static constexpr unsigned int value = 113; + static constexpr unsigned int VALUE = 113; }; #endif class MPFRNumber { // A precision value which allows sufficiently large additional // precision even compared to quad-precision floating point values. - unsigned int mpfrPrecision; + unsigned int mpfr_precision; mpfr_t value; public: - MPFRNumber() : mpfrPrecision(256) { mpfr_init2(value, mpfrPrecision); } + MPFRNumber() : mpfr_precision(256) { mpfr_init2(value, mpfr_precision); } // We use explicit EnableIf specializations to disallow implicit // conversions. Implicit conversions can potentially lead to loss of // precision. template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_flt(value, x, MPFR_RNDN); } template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_d(value, x, MPFR_RNDN); } template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_ld(value, x, MPFR_RNDN); } template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_sj(value, x, MPFR_RNDN); } - MPFRNumber(const MPFRNumber &other) : mpfrPrecision(other.mpfrPrecision) { - mpfr_init2(value, mpfrPrecision); + MPFRNumber(const MPFRNumber &other) : mpfr_precision(other.mpfr_precision) { + mpfr_init2(value, mpfr_precision); mpfr_set(value, other.value, MPFR_RNDN); } ~MPFRNumber() { mpfr_clear(value); } MPFRNumber &operator=(const MPFRNumber &rhs) { - mpfrPrecision = rhs.mpfrPrecision; + mpfr_precision = rhs.mpfr_precision; mpfr_set(value, rhs.value, MPFR_RNDN); return *this; } @@ -185,7 +189,7 @@ class MPFRNumber { return result; } - bool roundToLong(long &result) const { + bool roung_to_long(long &result) const { // We first calculate the rounded value. This way, when converting // to long using mpfr_get_si, the rounding direction of MPFR_RNDN // (or any other rounding mode), does not have an influence. @@ -195,10 +199,10 @@ class MPFRNumber { return mpfr_erangeflag_p(); } - bool roundToLong(mpfr_rnd_t rnd, long &result) const { + bool roung_to_long(mpfr_rnd_t rnd, long &result) const { MPFRNumber rint_result; mpfr_rint(rint_result.value, value, rnd); - return rint_result.roundToLong(result); + return rint_result.roung_to_long(result); } MPFRNumber rint(mpfr_rnd_t rnd) const { @@ -374,7 +378,7 @@ namespace internal { template cpp::EnableIfType::Value, MPFRNumber> -unaryOperation(Operation op, InputType input) { +unary_operation(Operation op, InputType input) { MPFRNumber mpfrInput(input); switch (op) { case Operation::Abs: @@ -416,7 +420,7 @@ unaryOperation(Operation op, InputType input) { template cpp::EnableIfType::Value, MPFRNumber> -unaryOperationTwoOutputs(Operation op, InputType input, int &output) { +unary_operation_two_outputs(Operation op, InputType input, int &output) { MPFRNumber mpfrInput(input); switch (op) { case Operation::Frexp: @@ -428,7 +432,7 @@ unaryOperationTwoOutputs(Operation op, InputType input, int &output) { template cpp::EnableIfType::Value, MPFRNumber> -binaryOperationOneOutput(Operation op, InputType x, InputType y) { +binary_operation_one_output(Operation op, InputType x, InputType y) { MPFRNumber inputX(x), inputY(y); switch (op) { case Operation::Hypot: @@ -440,7 +444,8 @@ binaryOperationOneOutput(Operation op, InputType x, InputType y) { template cpp::EnableIfType::Value, MPFRNumber> -binaryOperationTwoOutputs(Operation op, InputType x, InputType y, int &output) { +binary_operation_two_outputs(Operation op, InputType x, InputType y, + int &output) { MPFRNumber inputX(x), inputY(y); switch (op) { case Operation::RemQuo: @@ -452,11 +457,12 @@ binaryOperationTwoOutputs(Operation op, InputType x, InputType y, int &output) { template cpp::EnableIfType::Value, MPFRNumber> -ternaryOperationOneOutput(Operation op, InputType x, InputType y, InputType z) { +ternary_operation_one_output(Operation op, InputType x, InputType y, + InputType z) { // For FMA function, we just need to compare with the mpfr_fma with the same // precision as InputType. Using higher precision as the intermediate results // to compare might incorrectly fail due to double-rounding errors. - constexpr unsigned int prec = Precision::value; + constexpr unsigned int prec = Precision::VALUE; MPFRNumber inputX(x, prec), inputY(y, prec), inputZ(z, prec); switch (op) { case Operation::Fma: @@ -467,54 +473,55 @@ ternaryOperationOneOutput(Operation op, InputType x, InputType y, InputType z) { } template -void explainUnaryOperationSingleOutputError(Operation op, T input, T matchValue, - testutils::StreamWrapper &OS) { +void explain_unary_operation_single_output_error(Operation op, T input, + T matchValue, + testutils::StreamWrapper &OS) { MPFRNumber mpfrInput(input); - MPFRNumber mpfrResult = unaryOperation(op, input); + MPFRNumber mpfr_result = unary_operation(op, input); MPFRNumber mpfrMatchValue(matchValue); FPBits inputBits(input); FPBits matchBits(matchValue); - FPBits mpfrResultBits(mpfrResult.as()); + FPBits mpfr_resultBits(mpfr_result.as()); OS << "Match value not within tolerance value of MPFR result:\n" << " Input decimal: " << mpfrInput.str() << '\n'; __llvm_libc::fputil::testing::describeValue(" Input bits: ", input, OS); OS << '\n' << " Match decimal: " << mpfrMatchValue.str() << '\n'; __llvm_libc::fputil::testing::describeValue(" Match bits: ", matchValue, OS); - OS << '\n' << " MPFR result: " << mpfrResult.str() << '\n'; + OS << '\n' << " MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - " MPFR rounded: ", mpfrResult.as(), OS); + " MPFR rounded: ", mpfr_result.as(), OS); OS << '\n'; - OS << " ULP error: " << std::to_string(mpfrResult.ulp(matchValue)) + OS << " ULP error: " << std::to_string(mpfr_result.ulp(matchValue)) << '\n'; } template void -explainUnaryOperationSingleOutputError(Operation op, float, float, - testutils::StreamWrapper &); -template void -explainUnaryOperationSingleOutputError(Operation op, double, double, - testutils::StreamWrapper &); -template void explainUnaryOperationSingleOutputError( +explain_unary_operation_single_output_error(Operation op, float, float, + testutils::StreamWrapper &); +template void explain_unary_operation_single_output_error( + Operation op, double, double, testutils::StreamWrapper &); +template void explain_unary_operation_single_output_error( Operation op, long double, long double, testutils::StreamWrapper &); template -void explainUnaryOperationTwoOutputsError(Operation op, T input, - const BinaryOutput &libcResult, - testutils::StreamWrapper &OS) { +void explain_unary_operation_two_outputs_error( + Operation op, T input, const BinaryOutput &libc_result, + testutils::StreamWrapper &OS) { MPFRNumber mpfrInput(input); FPBits inputBits(input); int mpfrIntResult; - MPFRNumber mpfrResult = unaryOperationTwoOutputs(op, input, mpfrIntResult); + MPFRNumber mpfr_result = + unary_operation_two_outputs(op, input, mpfrIntResult); - if (mpfrIntResult != libcResult.i) { + if (mpfrIntResult != libc_result.i) { OS << "MPFR integral result: " << mpfrIntResult << '\n' - << "Libc integral result: " << libcResult.i << '\n'; + << "Libc integral result: " << libc_result.i << '\n'; } else { OS << "Integral result from libc matches integral result from MPFR.\n"; } - MPFRNumber mpfrMatchValue(libcResult.f); + MPFRNumber mpfrMatchValue(libc_result.f); OS << "Libc floating point result is not within tolerance value of the MPFR " << "result.\n\n"; @@ -522,74 +529,73 @@ void explainUnaryOperationTwoOutputsError(Operation op, T input, OS << "Libc floating point value: " << mpfrMatchValue.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - " Libc floating point bits: ", libcResult.f, OS); + " Libc floating point bits: ", libc_result.f, OS); OS << "\n\n"; - OS << " MPFR result: " << mpfrResult.str() << '\n'; + OS << " MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - " MPFR rounded: ", mpfrResult.as(), OS); + " MPFR rounded: ", mpfr_result.as(), OS); OS << '\n' << " ULP error: " - << std::to_string(mpfrResult.ulp(libcResult.f)) << '\n'; + << std::to_string(mpfr_result.ulp(libc_result.f)) << '\n'; } -template void explainUnaryOperationTwoOutputsError( +template void explain_unary_operation_two_outputs_error( Operation, float, const BinaryOutput &, testutils::StreamWrapper &); template void -explainUnaryOperationTwoOutputsError(Operation, double, - const BinaryOutput &, - testutils::StreamWrapper &); -template void explainUnaryOperationTwoOutputsError( +explain_unary_operation_two_outputs_error(Operation, double, + const BinaryOutput &, + testutils::StreamWrapper &); +template void explain_unary_operation_two_outputs_error( Operation, long double, const BinaryOutput &, testutils::StreamWrapper &); template -void explainBinaryOperationTwoOutputsError(Operation op, - const BinaryInput &input, - const BinaryOutput &libcResult, - testutils::StreamWrapper &OS) { +void explain_binary_operation_two_outputs_error( + Operation op, const BinaryInput &input, + const BinaryOutput &libc_result, testutils::StreamWrapper &OS) { MPFRNumber mpfrX(input.x); MPFRNumber mpfrY(input.y); FPBits xbits(input.x); FPBits ybits(input.y); int mpfrIntResult; - MPFRNumber mpfrResult = - binaryOperationTwoOutputs(op, input.x, input.y, mpfrIntResult); - MPFRNumber mpfrMatchValue(libcResult.f); + MPFRNumber mpfr_result = + binary_operation_two_outputs(op, input.x, input.y, mpfrIntResult); + MPFRNumber mpfrMatchValue(libc_result.f); OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << '\n' << "MPFR integral result: " << mpfrIntResult << '\n' - << "Libc integral result: " << libcResult.i << '\n' + << "Libc integral result: " << libc_result.i << '\n' << "Libc floating point result: " << mpfrMatchValue.str() << '\n' - << " MPFR result: " << mpfrResult.str() << '\n'; + << " MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - "Libc floating point result bits: ", libcResult.f, OS); + "Libc floating point result bits: ", libc_result.f, OS); __llvm_libc::fputil::testing::describeValue( - " MPFR rounded bits: ", mpfrResult.as(), OS); - OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult.f)) << '\n'; + " MPFR rounded bits: ", mpfr_result.as(), OS); + OS << "ULP error: " << std::to_string(mpfr_result.ulp(libc_result.f)) << '\n'; } -template void explainBinaryOperationTwoOutputsError( +template void explain_binary_operation_two_outputs_error( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); -template void explainBinaryOperationTwoOutputsError( +template void explain_binary_operation_two_outputs_error( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); -template void explainBinaryOperationTwoOutputsError( +template void explain_binary_operation_two_outputs_error( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); template -void explainBinaryOperationOneOutputError(Operation op, - const BinaryInput &input, - T libcResult, - testutils::StreamWrapper &OS) { +void explain_binary_operation_one_output_error(Operation op, + const BinaryInput &input, + T libc_result, + testutils::StreamWrapper &OS) { MPFRNumber mpfrX(input.x); MPFRNumber mpfrY(input.y); FPBits xbits(input.x); FPBits ybits(input.y); - MPFRNumber mpfrResult = binaryOperationOneOutput(op, input.x, input.y); - MPFRNumber mpfrMatchValue(libcResult); + MPFRNumber mpfr_result = binary_operation_one_output(op, input.x, input.y); + MPFRNumber mpfrMatchValue(libc_result); OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << '\n'; __llvm_libc::fputil::testing::describeValue("First input bits: ", input.x, @@ -598,36 +604,36 @@ void explainBinaryOperationOneOutputError(Operation op, OS); OS << "Libc result: " << mpfrMatchValue.str() << '\n' - << "MPFR result: " << mpfrResult.str() << '\n'; + << "MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - "Libc floating point result bits: ", libcResult, OS); + "Libc floating point result bits: ", libc_result, OS); __llvm_libc::fputil::testing::describeValue( - " MPFR rounded bits: ", mpfrResult.as(), OS); - OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult)) << '\n'; + " MPFR rounded bits: ", mpfr_result.as(), OS); + OS << "ULP error: " << std::to_string(mpfr_result.ulp(libc_result)) << '\n'; } -template void explainBinaryOperationOneOutputError( +template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, float, testutils::StreamWrapper &); -template void explainBinaryOperationOneOutputError( +template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, double, testutils::StreamWrapper &); -template void explainBinaryOperationOneOutputError( +template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, long double, testutils::StreamWrapper &); template -void explainTernaryOperationOneOutputError(Operation op, - const TernaryInput &input, - T libcResult, - testutils::StreamWrapper &OS) { - MPFRNumber mpfrX(input.x, Precision::value); - MPFRNumber mpfrY(input.y, Precision::value); - MPFRNumber mpfrZ(input.z, Precision::value); +void explain_ternary_operation_one_output_error(Operation op, + const TernaryInput &input, + T libc_result, + testutils::StreamWrapper &OS) { + MPFRNumber mpfrX(input.x, Precision::VALUE); + MPFRNumber mpfrY(input.y, Precision::VALUE); + MPFRNumber mpfrZ(input.z, Precision::VALUE); FPBits xbits(input.x); FPBits ybits(input.y); FPBits zbits(input.z); - MPFRNumber mpfrResult = - ternaryOperationOneOutput(op, input.x, input.y, input.z); - MPFRNumber mpfrMatchValue(libcResult); + MPFRNumber mpfr_result = + ternary_operation_one_output(op, input.x, input.y, input.z); + MPFRNumber mpfrMatchValue(libc_result); OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << " z: " << mpfrZ.str() << '\n'; @@ -639,144 +645,142 @@ void explainTernaryOperationOneOutputError(Operation op, OS); OS << "Libc result: " << mpfrMatchValue.str() << '\n' - << "MPFR result: " << mpfrResult.str() << '\n'; + << "MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - "Libc floating point result bits: ", libcResult, OS); + "Libc floating point result bits: ", libc_result, OS); __llvm_libc::fputil::testing::describeValue( - " MPFR rounded bits: ", mpfrResult.as(), OS); - OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult)) << '\n'; + " MPFR rounded bits: ", mpfr_result.as(), OS); + OS << "ULP error: " << std::to_string(mpfr_result.ulp(libc_result)) << '\n'; } -template void explainTernaryOperationOneOutputError( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, float, testutils::StreamWrapper &); -template void explainTernaryOperationOneOutputError( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, double, testutils::StreamWrapper &); -template void explainTernaryOperationOneOutputError( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, long double, testutils::StreamWrapper &); template -bool compareUnaryOperationSingleOutput(Operation op, T input, T libcResult, - double ulpError) { +bool compare_unary_operation_single_output(Operation op, T input, T libc_result, + double ulp_error) { // If the ulp error is exactly 0.5 (i.e a tie), we would check that the result // is rounded to the nearest even. - MPFRNumber mpfrResult = unaryOperation(op, input); - double ulp = mpfrResult.ulp(libcResult); - bool bitsAreEven = ((FPBits(libcResult).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); + MPFRNumber mpfr_result = unary_operation(op, input); + double ulp = mpfr_result.ulp(libc_result); + bool bits_are_even = ((FPBits(libc_result).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool compareUnaryOperationSingleOutput(Operation, float, float, - double); -template bool compareUnaryOperationSingleOutput(Operation, double, - double, double); -template bool compareUnaryOperationSingleOutput(Operation, - long double, - long double, - double); +template bool compare_unary_operation_single_output(Operation, float, + float, double); +template bool compare_unary_operation_single_output(Operation, double, + double, double); +template bool compare_unary_operation_single_output(Operation, + long double, + long double, + double); template -bool compareUnaryOperationTwoOutputs(Operation op, T input, - const BinaryOutput &libcResult, - double ulpError) { +bool compare_unary_operation_two_outputs(Operation op, T input, + const BinaryOutput &libc_result, + double ulp_error) { int mpfrIntResult; - MPFRNumber mpfrResult = unaryOperationTwoOutputs(op, input, mpfrIntResult); - double ulp = mpfrResult.ulp(libcResult.f); + MPFRNumber mpfr_result = + unary_operation_two_outputs(op, input, mpfrIntResult); + double ulp = mpfr_result.ulp(libc_result.f); - if (mpfrIntResult != libcResult.i) + if (mpfrIntResult != libc_result.i) return false; - bool bitsAreEven = ((FPBits(libcResult.f).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); + bool bits_are_even = ((FPBits(libc_result.f).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } template bool -compareUnaryOperationTwoOutputs(Operation, float, - const BinaryOutput &, double); -template bool -compareUnaryOperationTwoOutputs(Operation, double, - const BinaryOutput &, double); -template bool compareUnaryOperationTwoOutputs( +compare_unary_operation_two_outputs(Operation, float, + const BinaryOutput &, double); +template bool compare_unary_operation_two_outputs( + Operation, double, const BinaryOutput &, double); +template bool compare_unary_operation_two_outputs( Operation, long double, const BinaryOutput &, double); template -bool compareBinaryOperationTwoOutputs(Operation op, const BinaryInput &input, - const BinaryOutput &libcResult, - double ulpError) { +bool compare_binary_operation_two_outputs(Operation op, + const BinaryInput &input, + const BinaryOutput &libc_result, + double ulp_error) { int mpfrIntResult; - MPFRNumber mpfrResult = - binaryOperationTwoOutputs(op, input.x, input.y, mpfrIntResult); - double ulp = mpfrResult.ulp(libcResult.f); + MPFRNumber mpfr_result = + binary_operation_two_outputs(op, input.x, input.y, mpfrIntResult); + double ulp = mpfr_result.ulp(libc_result.f); - if (mpfrIntResult != libcResult.i) { + if (mpfrIntResult != libc_result.i) { if (op == Operation::RemQuo) { - if ((0x7 & mpfrIntResult) != (0x7 & libcResult.i)) + if ((0x7 & mpfrIntResult) != (0x7 & libc_result.i)) return false; } else { return false; } } - bool bitsAreEven = ((FPBits(libcResult.f).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); + bool bits_are_even = ((FPBits(libc_result.f).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool -compareBinaryOperationTwoOutputs(Operation, const BinaryInput &, - const BinaryOutput &, double); -template bool -compareBinaryOperationTwoOutputs(Operation, const BinaryInput &, - const BinaryOutput &, double); -template bool compareBinaryOperationTwoOutputs( +template bool compare_binary_operation_two_outputs( + Operation, const BinaryInput &, const BinaryOutput &, double); +template bool compare_binary_operation_two_outputs( + Operation, const BinaryInput &, const BinaryOutput &, + double); +template bool compare_binary_operation_two_outputs( Operation, const BinaryInput &, const BinaryOutput &, double); template -bool compareBinaryOperationOneOutput(Operation op, const BinaryInput &input, - T libcResult, double ulpError) { - MPFRNumber mpfrResult = binaryOperationOneOutput(op, input.x, input.y); - double ulp = mpfrResult.ulp(libcResult); - - bool bitsAreEven = ((FPBits(libcResult).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); +bool compare_binary_operation_one_output(Operation op, + const BinaryInput &input, + T libc_result, double ulp_error) { + MPFRNumber mpfr_result = binary_operation_one_output(op, input.x, input.y); + double ulp = mpfr_result.ulp(libc_result); + + bool bits_are_even = ((FPBits(libc_result).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool compareBinaryOperationOneOutput(Operation, - const BinaryInput &, - float, double); -template bool -compareBinaryOperationOneOutput(Operation, const BinaryInput &, - double, double); -template bool compareBinaryOperationOneOutput( +template bool compare_binary_operation_one_output( + Operation, const BinaryInput &, float, double); +template bool compare_binary_operation_one_output( + Operation, const BinaryInput &, double, double); +template bool compare_binary_operation_one_output( Operation, const BinaryInput &, long double, double); template -bool compareTernaryOperationOneOutput(Operation op, - const TernaryInput &input, - T libcResult, double ulpError) { - MPFRNumber mpfrResult = - ternaryOperationOneOutput(op, input.x, input.y, input.z); - double ulp = mpfrResult.ulp(libcResult); - - bool bitsAreEven = ((FPBits(libcResult).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); +bool compare_ternary_operation_one_output(Operation op, + const TernaryInput &input, + T libc_result, double ulp_error) { + MPFRNumber mpfr_result = + ternary_operation_one_output(op, input.x, input.y, input.z); + double ulp = mpfr_result.ulp(libc_result); + + bool bits_are_even = ((FPBits(libc_result).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool -compareTernaryOperationOneOutput(Operation, const TernaryInput &, - float, double); -template bool compareTernaryOperationOneOutput( +template bool compare_ternary_operation_one_output( + Operation, const TernaryInput &, float, double); +template bool compare_ternary_operation_one_output( Operation, const TernaryInput &, double, double); -template bool compareTernaryOperationOneOutput( +template bool compare_ternary_operation_one_output( Operation, const TernaryInput &, long double, double); -static mpfr_rnd_t getMPFRRoundingMode(RoundingMode mode) { +static mpfr_rnd_t get_mpfr_rounding_mode(RoundingMode mode) { switch (mode) { case RoundingMode::Upward: return MPFR_RNDU; @@ -795,33 +799,33 @@ static mpfr_rnd_t getMPFRRoundingMode(RoundingMode mode) { } // namespace internal -template bool RoundToLong(T x, long &result) { +template bool round_to_long(T x, long &result) { MPFRNumber mpfr(x); - return mpfr.roundToLong(result); + return mpfr.roung_to_long(result); } -template bool RoundToLong(float, long &); -template bool RoundToLong(double, long &); -template bool RoundToLong(long double, long &); +template bool round_to_long(float, long &); +template bool round_to_long(double, long &); +template bool round_to_long(long double, long &); -template bool RoundToLong(T x, RoundingMode mode, long &result) { +template bool round_to_long(T x, RoundingMode mode, long &result) { MPFRNumber mpfr(x); - return mpfr.roundToLong(internal::getMPFRRoundingMode(mode), result); + return mpfr.roung_to_long(internal::get_mpfr_rounding_mode(mode), result); } -template bool RoundToLong(float, RoundingMode, long &); -template bool RoundToLong(double, RoundingMode, long &); -template bool RoundToLong(long double, RoundingMode, long &); +template bool round_to_long(float, RoundingMode, long &); +template bool round_to_long(double, RoundingMode, long &); +template bool round_to_long(long double, RoundingMode, long &); -template T Round(T x, RoundingMode mode) { +template T round(T x, RoundingMode mode) { MPFRNumber mpfr(x); - MPFRNumber result = mpfr.rint(internal::getMPFRRoundingMode(mode)); + MPFRNumber result = mpfr.rint(internal::get_mpfr_rounding_mode(mode)); return result.as(); } -template float Round(float, RoundingMode); -template double Round(double, RoundingMode); -template long double Round(long double, RoundingMode); +template float round(float, RoundingMode); +template double round(double, RoundingMode); +template long double round(long double, RoundingMode); } // namespace mpfr } // namespace testing diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index ff35a56231bb..c52f2a76d9c6 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -98,132 +98,137 @@ namespace internal { template struct AreMatchingBinaryInputAndBinaryOutput { - static constexpr bool value = false; + static constexpr bool VALUE = false; }; template struct AreMatchingBinaryInputAndBinaryOutput, BinaryOutput> { - static constexpr bool value = cpp::IsFloatingPointType::Value; + static constexpr bool VALUE = cpp::IsFloatingPointType::Value; }; template -bool compareUnaryOperationSingleOutput(Operation op, T input, T libcOutput, - double t); +bool compare_unary_operation_single_output(Operation op, T input, T libc_output, + double t); template -bool compareUnaryOperationTwoOutputs(Operation op, T input, - const BinaryOutput &libcOutput, - double t); +bool compare_unary_operation_two_outputs(Operation op, T input, + const BinaryOutput &libc_output, + double t); template -bool compareBinaryOperationTwoOutputs(Operation op, const BinaryInput &input, - const BinaryOutput &libcOutput, - double t); +bool compare_binary_operation_two_outputs(Operation op, + const BinaryInput &input, + const BinaryOutput &libc_output, + double t); template -bool compareBinaryOperationOneOutput(Operation op, const BinaryInput &input, - T libcOutput, double t); +bool compare_binary_operation_one_output(Operation op, + const BinaryInput &input, + T libc_output, double t); template -bool compareTernaryOperationOneOutput(Operation op, - const TernaryInput &input, - T libcOutput, double t); +bool compare_ternary_operation_one_output(Operation op, + const TernaryInput &input, + T libc_output, double t); template -void explainUnaryOperationSingleOutputError(Operation op, T input, T matchValue, - testutils::StreamWrapper &OS); +void explain_unary_operation_single_output_error(Operation op, T input, + T match_value, + testutils::StreamWrapper &OS); template -void explainUnaryOperationTwoOutputsError(Operation op, T input, - const BinaryOutput &matchValue, - testutils::StreamWrapper &OS); +void explain_unary_operation_two_outputs_error( + Operation op, T input, const BinaryOutput &match_value, + testutils::StreamWrapper &OS); template -void explainBinaryOperationTwoOutputsError(Operation op, - const BinaryInput &input, - const BinaryOutput &matchValue, - testutils::StreamWrapper &OS); +void explain_binary_operation_two_outputs_error( + Operation op, const BinaryInput &input, + const BinaryOutput &match_value, testutils::StreamWrapper &OS); template -void explainBinaryOperationOneOutputError(Operation op, - const BinaryInput &input, - T matchValue, - testutils::StreamWrapper &OS); +void explain_binary_operation_one_output_error(Operation op, + const BinaryInput &input, + T match_value, + testutils::StreamWrapper &OS); template -void explainTernaryOperationOneOutputError(Operation op, - const TernaryInput &input, - T matchValue, - testutils::StreamWrapper &OS); +void explain_ternary_operation_one_output_error(Operation op, + const TernaryInput &input, + T match_value, + testutils::StreamWrapper &OS); template class MPFRMatcher : public testing::Matcher { InputType input; - OutputType matchValue; - double ulpTolerance; + OutputType match_value; + double ulp_tolerance; public: - MPFRMatcher(InputType testInput, double ulpTolerance) - : input(testInput), ulpTolerance(ulpTolerance) {} + MPFRMatcher(InputType testInput, double ulp_tolerance) + : input(testInput), ulp_tolerance(ulp_tolerance) {} bool match(OutputType libcResult) { - matchValue = libcResult; - return match(input, matchValue, ulpTolerance); + match_value = libcResult; + return match(input, match_value, ulp_tolerance); } - void explainError(testutils::StreamWrapper &OS) override { - explainError(input, matchValue, OS); + // This method is marked with NOLINT because it the name `explainError` + // does not confirm to the coding style. + void explainError(testutils::StreamWrapper &OS) override { // NOLINT + explain_error(input, match_value, OS); } private: template static bool match(T in, T out, double tolerance) { - return compareUnaryOperationSingleOutput(op, in, out, tolerance); + return compare_unary_operation_single_output(op, in, out, tolerance); } template static bool match(T in, const BinaryOutput &out, double tolerance) { - return compareUnaryOperationTwoOutputs(op, in, out, tolerance); + return compare_unary_operation_two_outputs(op, in, out, tolerance); } template static bool match(const BinaryInput &in, T out, double tolerance) { - return compareBinaryOperationOneOutput(op, in, out, tolerance); + return compare_binary_operation_one_output(op, in, out, tolerance); } template static bool match(BinaryInput in, const BinaryOutput &out, double tolerance) { - return compareBinaryOperationTwoOutputs(op, in, out, tolerance); + return compare_binary_operation_two_outputs(op, in, out, tolerance); } template static bool match(const TernaryInput &in, T out, double tolerance) { - return compareTernaryOperationOneOutput(op, in, out, tolerance); + return compare_ternary_operation_one_output(op, in, out, tolerance); } template - static void explainError(T in, T out, testutils::StreamWrapper &OS) { - explainUnaryOperationSingleOutputError(op, in, out, OS); + static void explain_error(T in, T out, testutils::StreamWrapper &OS) { + explain_unary_operation_single_output_error(op, in, out, OS); } template - static void explainError(T in, const BinaryOutput &out, - testutils::StreamWrapper &OS) { - explainUnaryOperationTwoOutputsError(op, in, out, OS); + static void explain_error(T in, const BinaryOutput &out, + testutils::StreamWrapper &OS) { + explain_unary_operation_two_outputs_error(op, in, out, OS); } template - static void explainError(const BinaryInput &in, const BinaryOutput &out, - testutils::StreamWrapper &OS) { - explainBinaryOperationTwoOutputsError(op, in, out, OS); + static void explain_error(const BinaryInput &in, + const BinaryOutput &out, + testutils::StreamWrapper &OS) { + explain_binary_operation_two_outputs_error(op, in, out, OS); } template - static void explainError(const BinaryInput &in, T out, - testutils::StreamWrapper &OS) { - explainBinaryOperationOneOutputError(op, in, out, OS); + static void explain_error(const BinaryInput &in, T out, + testutils::StreamWrapper &OS) { + explain_binary_operation_one_output_error(op, in, out, OS); } template - static void explainError(const TernaryInput &in, T out, - testutils::StreamWrapper &OS) { - explainTernaryOperationOneOutputError(op, in, out, OS); + static void explain_error(const TernaryInput &in, T out, + testutils::StreamWrapper &OS) { + explain_ternary_operation_one_output_error(op, in, out, OS); } }; @@ -232,7 +237,7 @@ class MPFRMatcher : public testing::Matcher { // Return true if the input and ouput types for the operation op are valid // types. template -constexpr bool isValidOperation() { +constexpr bool is_valid_operation() { return (Operation::BeginUnaryOperationsSingleOutput < op && op < Operation::EndUnaryOperationsSingleOutput && cpp::IsSame::Value && @@ -248,7 +253,7 @@ constexpr bool isValidOperation() { (Operation::BeginBinaryOperationsTwoOutputs < op && op < Operation::EndBinaryOperationsTwoOutputs && internal::AreMatchingBinaryInputAndBinaryOutput::value) || + OutputType>::VALUE) || (Operation::BeginTernaryOperationsSingleOuput < op && op < Operation::EndTernaryOperationsSingleOutput && cpp::IsFloatingPointType::Value && @@ -257,29 +262,29 @@ constexpr bool isValidOperation() { template __attribute__((no_sanitize("address"))) -cpp::EnableIfType(), +cpp::EnableIfType(), internal::MPFRMatcher> -getMPFRMatcher(InputType input, OutputType outputUnused, double t) { +get_mpfr_matcher(InputType input, OutputType output_unused, double t) { return internal::MPFRMatcher(input, t); } enum class RoundingMode : uint8_t { Upward, Downward, TowardZero, Nearest }; -template T Round(T x, RoundingMode mode); +template T round(T x, RoundingMode mode); -template bool RoundToLong(T x, long &result); -template bool RoundToLong(T x, RoundingMode mode, long &result); +template bool round_to_long(T x, long &result); +template bool round_to_long(T x, RoundingMode mode, long &result); } // namespace mpfr } // namespace testing } // namespace __llvm_libc -#define EXPECT_MPFR_MATCH(op, input, matchValue, tolerance) \ - EXPECT_THAT(matchValue, __llvm_libc::testing::mpfr::getMPFRMatcher( \ - input, matchValue, tolerance)) +#define EXPECT_MPFR_MATCH(op, input, match_value, tolerance) \ + EXPECT_THAT(match_value, __llvm_libc::testing::mpfr::get_mpfr_matcher( \ + input, match_value, tolerance)) -#define ASSERT_MPFR_MATCH(op, input, matchValue, tolerance) \ - ASSERT_THAT(matchValue, __llvm_libc::testing::mpfr::getMPFRMatcher( \ - input, matchValue, tolerance)) +#define ASSERT_MPFR_MATCH(op, input, match_value, tolerance) \ + ASSERT_THAT(match_value, __llvm_libc::testing::mpfr::get_mpfr_matcher( \ + input, match_value, tolerance)) #endif // LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H diff --git a/libc/utils/UnitTest/LibcTest.cpp b/libc/utils/UnitTest/LibcTest.cpp index 710ee15bba3f..c998337039a6 100644 --- a/libc/utils/UnitTest/LibcTest.cpp +++ b/libc/utils/UnitTest/LibcTest.cpp @@ -291,22 +291,22 @@ bool Test::testMatch(bool MatchResult, MatcherBase &Matcher, const char *LHSStr, bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, const char *LHSStr, const char *RHSStr, const char *File, unsigned long Line) { - testutils::ProcessStatus Result = testutils::invokeInSubprocess(Func, 500); + testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 500); - if (const char *error = Result.getError()) { + if (const char *error = Result.get_error()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << error << '\n'; return false; } - if (Result.timedOut()) { + if (Result.timed_out()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Process timed out after " << 500 << " milliseconds.\n"; return false; } - if (Result.exitedNormally()) { + if (Result.exited_normally()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Expected " << LHSStr @@ -314,41 +314,41 @@ bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, return false; } - int KilledBy = Result.getFatalSignal(); + int KilledBy = Result.get_fatal_signal(); assert(KilledBy != 0 && "Not killed by any signal"); if (Signal == -1 || KilledBy == Signal) return true; - using testutils::signalAsString; + using testutils::signal_as_string; Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << " Expected: " << LHSStr << '\n' << "To be killed by signal: " << Signal << '\n' - << " Which is: " << signalAsString(Signal) << '\n' + << " Which is: " << signal_as_string(Signal) << '\n' << " But it was killed by: " << KilledBy << '\n' - << " Which is: " << signalAsString(KilledBy) << '\n'; + << " Which is: " << signal_as_string(KilledBy) << '\n'; return false; } bool Test::testProcessExits(testutils::FunctionCaller *Func, int ExitCode, const char *LHSStr, const char *RHSStr, const char *File, unsigned long Line) { - testutils::ProcessStatus Result = testutils::invokeInSubprocess(Func, 500); + testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 500); - if (const char *error = Result.getError()) { + if (const char *error = Result.get_error()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << error << '\n'; return false; } - if (Result.timedOut()) { + if (Result.timed_out()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Process timed out after " << 500 << " milliseconds.\n"; return false; } - if (!Result.exitedNormally()) { + if (!Result.exited_normally()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Expected " << LHSStr << '\n' @@ -357,7 +357,7 @@ bool Test::testProcessExits(testutils::FunctionCaller *Func, int ExitCode, return false; } - int ActualExit = Result.getExitCode(); + int ActualExit = Result.get_exit_code(); if (ActualExit == ExitCode) return true; diff --git a/libc/utils/testutils/ExecuteFunction.h b/libc/utils/testutils/ExecuteFunction.h index 32482ebaf3bf..a22336575d82 100644 --- a/libc/utils/testutils/ExecuteFunction.h +++ b/libc/utils/testutils/ExecuteFunction.h @@ -21,28 +21,29 @@ class FunctionCaller { }; struct ProcessStatus { - int PlatformDefined; + int platform_defined; const char *failure = nullptr; - static constexpr uintptr_t timeout = -1L; + static constexpr uintptr_t TIMEOUT = -1L; - static ProcessStatus Error(const char *error) { return {0, error}; } - static ProcessStatus TimedOut() { - return {0, reinterpret_cast(timeout)}; + static ProcessStatus error(const char *error) { return {0, error}; } + static ProcessStatus timed_out_ps() { + return {0, reinterpret_cast(TIMEOUT)}; } - bool timedOut() const { - return failure == reinterpret_cast(timeout); + bool timed_out() const { + return failure == reinterpret_cast(TIMEOUT); } - const char *getError() const { return timedOut() ? nullptr : failure; } - bool exitedNormally() const; - int getExitCode() const; - int getFatalSignal() const; + const char *get_error() const { return timed_out() ? nullptr : failure; } + bool exited_normally() const; + int get_exit_code() const; + int get_fatal_signal() const; }; -ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned TimeoutMS = -1); +ProcessStatus invoke_in_subprocess(FunctionCaller *Func, + unsigned TimeoutMS = -1); -const char *signalAsString(int Signum); +const char *signal_as_string(int Signum); } // namespace testutils } // namespace __llvm_libc diff --git a/libc/utils/testutils/ExecuteFunctionUnix.cpp b/libc/utils/testutils/ExecuteFunctionUnix.cpp index 4050a0ca8a6e..a3871d07386d 100644 --- a/libc/utils/testutils/ExecuteFunctionUnix.cpp +++ b/libc/utils/testutils/ExecuteFunctionUnix.cpp @@ -20,33 +20,33 @@ namespace __llvm_libc { namespace testutils { -bool ProcessStatus::exitedNormally() const { - return WIFEXITED(PlatformDefined); +bool ProcessStatus::exited_normally() const { + return WIFEXITED(platform_defined); } -int ProcessStatus::getExitCode() const { - assert(exitedNormally() && "Abnormal termination, no exit code"); - return WEXITSTATUS(PlatformDefined); +int ProcessStatus::get_exit_code() const { + assert(exited_normally() && "Abnormal termination, no exit code"); + return WEXITSTATUS(platform_defined); } -int ProcessStatus::getFatalSignal() const { - if (exitedNormally()) +int ProcessStatus::get_fatal_signal() const { + if (exited_normally()) return 0; - return WTERMSIG(PlatformDefined); + return WTERMSIG(platform_defined); } -ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned timeoutMS) { +ProcessStatus invoke_in_subprocess(FunctionCaller *Func, unsigned timeoutMS) { std::unique_ptr X(Func); int pipeFDs[2]; if (::pipe(pipeFDs) == -1) - return ProcessStatus::Error("pipe(2) failed"); + return ProcessStatus::error("pipe(2) failed"); // Don't copy the buffers into the child process and print twice. std::cout.flush(); std::cerr.flush(); pid_t Pid = ::fork(); if (Pid == -1) - return ProcessStatus::Error("fork(2) failed"); + return ProcessStatus::error("fork(2) failed"); if (!Pid) { (*Func)(); @@ -60,11 +60,11 @@ ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned timeoutMS) { // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. if (::poll(&pollFD, 1, timeoutMS) == -1) - return ProcessStatus::Error("poll(2) failed"); + return ProcessStatus::error("poll(2) failed"); // If the pipe wasn't closed by the child yet then timeout has expired. if (!(pollFD.revents & POLLHUP)) { ::kill(Pid, SIGKILL); - return ProcessStatus::TimedOut(); + return ProcessStatus::timed_out_ps(); } int WStatus = 0; @@ -72,13 +72,13 @@ ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned timeoutMS) { // and doesn't turn into a zombie. pid_t status = ::waitpid(Pid, &WStatus, 0); if (status == -1) - return ProcessStatus::Error("waitpid(2) failed"); + return ProcessStatus::error("waitpid(2) failed"); assert(status == Pid); (void)status; return {WStatus}; } -const char *signalAsString(int Signum) { return ::strsignal(Signum); } +const char *signal_as_string(int Signum) { return ::strsignal(Signum); } } // namespace testutils } // namespace __llvm_libc diff --git a/libc/utils/testutils/FDReader.h b/libc/utils/testutils/FDReader.h index e25dcea290a6..8a39a92dcc33 100644 --- a/libc/utils/testutils/FDReader.h +++ b/libc/utils/testutils/FDReader.h @@ -19,8 +19,8 @@ class FDReader { FDReader(); ~FDReader(); - int getWriteFD() { return pipefd[1]; } - bool matchWritten(const char *); + int get_write_fd() { return pipefd[1]; } + bool match_written(const char *); }; } // namespace testutils diff --git a/libc/utils/testutils/FDReaderUnix.cpp b/libc/utils/testutils/FDReaderUnix.cpp index db1f63f0dcc3..be6cde331f37 100644 --- a/libc/utils/testutils/FDReaderUnix.cpp +++ b/libc/utils/testutils/FDReaderUnix.cpp @@ -27,7 +27,7 @@ FDReader::~FDReader() { ::close(pipefd[1]); } -bool FDReader::matchWritten(const char *str) { +bool FDReader::match_written(const char *str) { ::close(pipefd[1]); diff --git a/libc/utils/testutils/StreamWrapper.cpp b/libc/utils/testutils/StreamWrapper.cpp index 84ab583f6644..f65c9c3055ce 100644 --- a/libc/utils/testutils/StreamWrapper.cpp +++ b/libc/utils/testutils/StreamWrapper.cpp @@ -19,8 +19,8 @@ namespace testutils { StreamWrapper outs() { return {std::addressof(std::cout)}; } template StreamWrapper &StreamWrapper::operator<<(T t) { - assert(OS); - std::ostream &Stream = *reinterpret_cast(OS); + assert(os); + std::ostream &Stream = *reinterpret_cast(os); Stream << t; Stream.flush(); return *this; @@ -52,7 +52,7 @@ OutputFileStream::OutputFileStream(const char *FN) : StreamWrapper(new std::ofstream(FN)) {} OutputFileStream::~OutputFileStream() { - delete reinterpret_cast(OS); + delete reinterpret_cast(os); } } // namespace testutils diff --git a/libc/utils/testutils/StreamWrapper.h b/libc/utils/testutils/StreamWrapper.h index 2434ab06bb71..f4309abdedef 100644 --- a/libc/utils/testutils/StreamWrapper.h +++ b/libc/utils/testutils/StreamWrapper.h @@ -17,10 +17,10 @@ namespace testutils { // expose the system libc headers. class StreamWrapper { protected: - void *OS; + void *os; public: - StreamWrapper(void *OS) : OS(OS) {} + StreamWrapper(void *OS) : os(OS) {} template StreamWrapper &operator<<(T t); }; diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 39744bb21559..03a6a0781b5a 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -196,6 +196,7 @@ endif() option(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT "Enable per TU ABI insulation by default. To be used by vendors." OFF) set(LIBCXX_ABI_DEFINES "" CACHE STRING "A semicolon separated list of ABI macros to define in the site config header.") +option(LIBCXX_EXTRA_SITE_DEFINES "Extra defines to add into __config_site") option(LIBCXX_USE_COMPILER_RT "Use compiler-rt instead of libgcc" OFF) set(LIBCXX_LIBCPPABI_VERSION "2" CACHE STRING "Version of libc++abi's ABI to re-export from libc++ when re-exporting is enabled. Note that this is not related to the version of libc++'s ABI itself!") @@ -909,6 +910,17 @@ if (LIBCXX_ABI_DEFINES) config_define(${abi_defines} _LIBCPP_ABI_DEFINES) endif() +if (LIBCXX_EXTRA_SITE_DEFINES) + set(extra_site_defines) + foreach (extra_site_define ${LIBCXX_EXTRA_SITE_DEFINES}) + # Allow defines such as DEFINE=VAL, transformed into "#define DEFINE VAL". + string(REPLACE "=" " " extra_site_define "${extra_site_define}") + list(APPEND extra_site_defines "#define ${extra_site_define}") + endforeach() + string(REPLACE ";" "\n" extra_site_defines "${extra_site_defines}") + config_define(${extra_site_defines} _LIBCPP_EXTRA_SITE_DEFINES) +endif() + # By default libc++ on Windows expects to use a shared library, which requires # the headers to use DLL import/export semantics. However when building a # static library only we modify the headers to disable DLL import/export. diff --git a/libcxx/cmake/caches/MinGW.cmake b/libcxx/cmake/caches/MinGW.cmake index 458eb012ad15..b9ac28006eb8 100644 --- a/libcxx/cmake/caches/MinGW.cmake +++ b/libcxx/cmake/caches/MinGW.cmake @@ -13,8 +13,6 @@ set(LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "") set(LIBCXX_TARGET_INFO "libcxx.test.target_info.MingwLocalTI" CACHE STRING "") -# Without these flags, 'long double' (which is 80 bit on x86 mingw, but +# Without this flag, 'long double' (which is 80 bit on x86 mingw, but # 64 bit in MSVC) isn't handled correctly in printf. -set(CMAKE_C_FLAGS "-D__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") -set(CMAKE_CXX_FLAGS "-D__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") -set(LIBCXX_TEST_COMPILER_FLAGS "-D__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") +set(LIBCXX_EXTRA_SITE_DEFINES "__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv index 8a5b75928e74..7a76e1f57645 100644 --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -25,7 +25,7 @@ "","","","","","" "`P0288R9 `__","LWG","``any_invocable``","October 2021","","" "`P0798R8 `__","LWG","Monadic operations for ``std::optional``","October 2021","|Complete|","14.0" -"`P0849R8 `__","LWG","``auto(x)``: ``DECAY_COPY`` in the language","October 2021","","" +"`P0849R8 `__","LWG","``auto(x)``: ``DECAY_COPY`` in the language","October 2021","|Complete|","14.0" "`P1072R10 `__","LWG","``basic_string::resize_and_overwrite``","October 2021","","" "`P1147R1 `__","LWG","Printing ``volatile`` Pointers","October 2021","|Complete|","14.0" "`P1272R4 `__","LWG","Byteswapping for fun&&nuf","October 2021","|Complete|","14.0" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index f7e459b59179..180f7d4259cd 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -141,6 +141,22 @@ set(files __coroutine/trivial_awaitables.h __debug __errc + __filesystem/copy_options.h + __filesystem/directory_entry.h + __filesystem/directory_iterator.h + __filesystem/directory_options.h + __filesystem/file_status.h + __filesystem/file_time_type.h + __filesystem/file_type.h + __filesystem/filesystem_error.h + __filesystem/operations.h + __filesystem/path_iterator.h + __filesystem/path.h + __filesystem/perm_options.h + __filesystem/perms.h + __filesystem/recursive_directory_iterator.h + __filesystem/space_info.h + __filesystem/u8path.h __format/format_arg.h __format/format_args.h __format/format_context.h @@ -344,8 +360,8 @@ set(files __tuple __undef_macros __utility/as_const.h + __utility/auto_cast.h __utility/cmp.h - __utility/decay_copy.h __utility/declval.h __utility/exchange.h __utility/forward.h diff --git a/libcxx/include/__config b/libcxx/include/__config index 84ce728ac3a0..720e12eac0dd 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -333,18 +333,43 @@ # define _LIBCPP_SHORT_WCHAR 1 #endif +// Libc++ supports various implementations of std::random_device. +// +// _LIBCPP_USING_DEV_RANDOM +// Read entropy from the given file, by default `/dev/urandom`. +// If a token is provided, it is assumed to be the path to a file +// to read entropy from. This is the default behavior if nothing +// else is specified. This implementation requires storing state +// inside `std::random_device`. +// +// _LIBCPP_USING_ARC4_RANDOM +// Use arc4random(). This allows obtaining random data even when +// using sandboxing mechanisms. On some platforms like Apple, this +// is the recommended source of entropy for user-space programs. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_GETENTROPY +// Use getentropy(). +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_NACL_RANDOM +// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, +// including accesses to the special files under `/dev`. This implementation +// uses the NaCL syscall `nacl_secure_random_init()` to get entropy. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_WIN32_RANDOM +// Use rand_s(), for use on Windows. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. #if defined(__OpenBSD__) - // Certain architectures provide arc4random(). Prefer using - // arc4random() over /dev/{u,}random to make it possible to obtain - // random data even when using sandboxing mechanisms such as chroots, - // Capsicum, etc. # define _LIBCPP_USING_ARC4_RANDOM #elif defined(__Fuchsia__) || defined(__wasi__) # define _LIBCPP_USING_GETENTROPY #elif defined(__native_client__) - // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, - // including accesses to the special files under /dev. C++11's - // std::random_device is instead exposed through a NaCl syscall. # define _LIBCPP_USING_NACL_RANDOM #elif defined(_LIBCPP_WIN32API) # define _LIBCPP_USING_WIN32_RANDOM diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in index a8b17ad72b73..59ca22981242 100644 --- a/libcxx/include/__config_site.in +++ b/libcxx/include/__config_site.in @@ -35,5 +35,6 @@ #cmakedefine _LIBCPP_HAS_NO_INCOMPLETE_RANGES @_LIBCPP_ABI_DEFINES@ +@_LIBCPP_EXTRA_SITE_DEFINES@ #endif // _LIBCPP_CONFIG_SITE diff --git a/libcxx/include/__filesystem/copy_options.h b/libcxx/include/__filesystem/copy_options.h new file mode 100644 index 000000000000..c0140d45717b --- /dev/null +++ b/libcxx/include/__filesystem/copy_options.h @@ -0,0 +1,80 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_COPY_OPTIONS_H +#define _LIBCPP___FILESYSTEM_COPY_OPTIONS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +enum class _LIBCPP_ENUM_VIS copy_options : unsigned short { + none = 0, + skip_existing = 1, + overwrite_existing = 2, + update_existing = 4, + recursive = 8, + copy_symlinks = 16, + skip_symlinks = 32, + directories_only = 64, + create_symlinks = 128, + create_hard_links = 256, + __in_recursive_copy = 512, +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator&(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator|(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator^(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator~(copy_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_COPY_OPTIONS_H diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h new file mode 100644 index 000000000000..9efe19465428 --- /dev/null +++ b/libcxx/include/__filesystem/directory_entry.h @@ -0,0 +1,504 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H +#define _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include <__filesystem/file_time_type.h> +#include <__filesystem/filesystem_error.h> +#include <__filesystem/file_status.h> +#include <__filesystem/file_type.h> +#include <__filesystem/operations.h> +#include <__filesystem/perms.h> +#include <__errc> +#include +#include +#include +#include + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + + +class directory_entry { + typedef _VSTD_FS::path _Path; + +public: + // constructors and destructors + directory_entry() noexcept = default; + directory_entry(directory_entry const&) = default; + directory_entry(directory_entry&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + explicit directory_entry(_Path const& __p) : __p_(__p) { + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + directory_entry(_Path const& __p, error_code& __ec) : __p_(__p) { + __refresh(&__ec); + } + + ~directory_entry() {} + + directory_entry& operator=(directory_entry const&) = default; + directory_entry& operator=(directory_entry&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + void assign(_Path const& __p) { + __p_ = __p; + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void assign(_Path const& __p, error_code& __ec) { + __p_ = __p; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void replace_filename(_Path const& __p) { + __p_.replace_filename(__p); + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void replace_filename(_Path const& __p, error_code& __ec) { + __p_ = __p_.parent_path() / __p; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void refresh() { __refresh(); } + + _LIBCPP_INLINE_VISIBILITY + void refresh(error_code& __ec) noexcept { __refresh(&__ec); } + + _LIBCPP_INLINE_VISIBILITY + _Path const& path() const noexcept { return __p_; } + + _LIBCPP_INLINE_VISIBILITY + operator const _Path&() const noexcept { return __p_; } + + _LIBCPP_INLINE_VISIBILITY + bool exists() const { return _VSTD_FS::exists(file_status{__get_ft()}); } + + _LIBCPP_INLINE_VISIBILITY + bool exists(error_code& __ec) const noexcept { + return _VSTD_FS::exists(file_status{__get_ft(&__ec)}); + } + + _LIBCPP_INLINE_VISIBILITY + bool is_block_file() const { return __get_ft() == file_type::block; } + + _LIBCPP_INLINE_VISIBILITY + bool is_block_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::block; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_character_file() const { return __get_ft() == file_type::character; } + + _LIBCPP_INLINE_VISIBILITY + bool is_character_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::character; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_directory() const { return __get_ft() == file_type::directory; } + + _LIBCPP_INLINE_VISIBILITY + bool is_directory(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::directory; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_fifo() const { return __get_ft() == file_type::fifo; } + + _LIBCPP_INLINE_VISIBILITY + bool is_fifo(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::fifo; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_other() const { return _VSTD_FS::is_other(file_status{__get_ft()}); } + + _LIBCPP_INLINE_VISIBILITY + bool is_other(error_code& __ec) const noexcept { + return _VSTD_FS::is_other(file_status{__get_ft(&__ec)}); + } + + _LIBCPP_INLINE_VISIBILITY + bool is_regular_file() const { return __get_ft() == file_type::regular; } + + _LIBCPP_INLINE_VISIBILITY + bool is_regular_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::regular; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_socket() const { return __get_ft() == file_type::socket; } + + _LIBCPP_INLINE_VISIBILITY + bool is_socket(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::socket; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } + + _LIBCPP_INLINE_VISIBILITY + bool is_symlink(error_code& __ec) const noexcept { + return __get_sym_ft(&__ec) == file_type::symlink; + } + _LIBCPP_INLINE_VISIBILITY + uintmax_t file_size() const { return __get_size(); } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t file_size(error_code& __ec) const noexcept { + return __get_size(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t hard_link_count() const { return __get_nlink(); } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t hard_link_count(error_code& __ec) const noexcept { + return __get_nlink(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_time_type last_write_time() const { return __get_write_time(); } + + _LIBCPP_INLINE_VISIBILITY + file_time_type last_write_time(error_code& __ec) const noexcept { + return __get_write_time(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_status status() const { return __get_status(); } + + _LIBCPP_INLINE_VISIBILITY + file_status status(error_code& __ec) const noexcept { + return __get_status(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_status symlink_status() const { return __get_symlink_status(); } + + _LIBCPP_INLINE_VISIBILITY + file_status symlink_status(error_code& __ec) const noexcept { + return __get_symlink_status(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + bool operator<(directory_entry const& __rhs) const noexcept { + return __p_ < __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator==(directory_entry const& __rhs) const noexcept { + return __p_ == __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator!=(directory_entry const& __rhs) const noexcept { + return __p_ != __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator<=(directory_entry const& __rhs) const noexcept { + return __p_ <= __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator>(directory_entry const& __rhs) const noexcept { + return __p_ > __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator>=(directory_entry const& __rhs) const noexcept { + return __p_ >= __rhs.__p_; + } + +private: + friend class directory_iterator; + friend class recursive_directory_iterator; + friend class __dir_stream; + + enum _CacheType : unsigned char { + _Empty, + _IterSymlink, + _IterNonSymlink, + _RefreshSymlink, + _RefreshSymlinkUnresolved, + _RefreshNonSymlink + }; + + struct __cached_data { + uintmax_t __size_; + uintmax_t __nlink_; + file_time_type __write_time_; + perms __sym_perms_; + perms __non_sym_perms_; + file_type __type_; + _CacheType __cache_type_; + + _LIBCPP_INLINE_VISIBILITY + __cached_data() noexcept { __reset(); } + + _LIBCPP_INLINE_VISIBILITY + void __reset() { + __cache_type_ = _Empty; + __type_ = file_type::none; + __sym_perms_ = __non_sym_perms_ = perms::unknown; + __size_ = __nlink_ = uintmax_t(-1); + __write_time_ = file_time_type::min(); + } + }; + + _LIBCPP_INLINE_VISIBILITY + static __cached_data __create_iter_result(file_type __ft) { + __cached_data __data; + __data.__type_ = __ft; + __data.__cache_type_ = [&]() { + switch (__ft) { + case file_type::none: + return _Empty; + case file_type::symlink: + return _IterSymlink; + default: + return _IterNonSymlink; + } + }(); + return __data; + } + + _LIBCPP_INLINE_VISIBILITY + void __assign_iter_entry(_Path&& __p, __cached_data __dt) { + __p_ = _VSTD::move(__p); + __data_ = __dt; + } + + _LIBCPP_FUNC_VIS + error_code __do_refresh() noexcept; + + _LIBCPP_INLINE_VISIBILITY + static bool __is_dne_error(error_code const& __ec) { + if (!__ec) + return true; + switch (static_cast(__ec.value())) { + case errc::no_such_file_or_directory: + case errc::not_a_directory: + return true; + default: + return false; + } + } + + _LIBCPP_INLINE_VISIBILITY + void __handle_error(const char* __msg, error_code* __dest_ec, + error_code const& __ec, bool __allow_dne = false) const { + if (__dest_ec) { + *__dest_ec = __ec; + return; + } + if (__ec && (!__allow_dne || !__is_dne_error(__ec))) + __throw_filesystem_error(__msg, __p_, __ec); + } + + _LIBCPP_INLINE_VISIBILITY + void __refresh(error_code* __ec = nullptr) { + __handle_error("in directory_entry::refresh", __ec, __do_refresh(), + /*allow_dne*/ true); + } + + _LIBCPP_INLINE_VISIBILITY + file_type __get_sym_ft(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + return __symlink_status(__p_, __ec).type(); + case _IterSymlink: + case _RefreshSymlink: + case _RefreshSymlinkUnresolved: + if (__ec) + __ec->clear(); + return file_type::symlink; + case _IterNonSymlink: + case _RefreshNonSymlink: + file_status __st(__data_.__type_); + if (__ec && !_VSTD_FS::exists(__st)) + *__ec = make_error_code(errc::no_such_file_or_directory); + else if (__ec) + __ec->clear(); + return __data_.__type_; + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_type __get_ft(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return __status(__p_, __ec).type(); + case _IterNonSymlink: + case _RefreshNonSymlink: + case _RefreshSymlink: { + file_status __st(__data_.__type_); + if (__ec && !_VSTD_FS::exists(__st)) + *__ec = make_error_code(errc::no_such_file_or_directory); + else if (__ec) + __ec->clear(); + return __data_.__type_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_status __get_status(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return __status(__p_, __ec); + case _RefreshNonSymlink: + case _RefreshSymlink: + return file_status(__get_ft(__ec), __data_.__non_sym_perms_); + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_status __get_symlink_status(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + return __symlink_status(__p_, __ec); + case _RefreshNonSymlink: + return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_); + case _RefreshSymlink: + case _RefreshSymlinkUnresolved: + return file_status(__get_sym_ft(__ec), __data_.__sym_perms_); + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t __get_size(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__file_size(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + file_status __st(__get_ft(&__m_ec)); + __handle_error("in directory_entry::file_size", __ec, __m_ec); + if (_VSTD_FS::exists(__st) && !_VSTD_FS::is_regular_file(__st)) { + errc __err_kind = _VSTD_FS::is_directory(__st) ? errc::is_a_directory + : errc::not_supported; + __handle_error("in directory_entry::file_size", __ec, + make_error_code(__err_kind)); + } + return __data_.__size_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t __get_nlink(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__hard_link_count(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + (void)__get_ft(&__m_ec); + __handle_error("in directory_entry::hard_link_count", __ec, __m_ec); + return __data_.__nlink_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_time_type __get_write_time(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__last_write_time(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + file_status __st(__get_ft(&__m_ec)); + __handle_error("in directory_entry::last_write_time", __ec, __m_ec); + if (_VSTD_FS::exists(__st) && + __data_.__write_time_ == file_time_type::min()) + __handle_error("in directory_entry::last_write_time", __ec, + make_error_code(errc::value_too_large)); + return __data_.__write_time_; + } + } + _LIBCPP_UNREACHABLE(); + } + +private: + _Path __p_; + __cached_data __data_; +}; + +class __dir_element_proxy { +public: + inline _LIBCPP_INLINE_VISIBILITY directory_entry operator*() { + return _VSTD::move(__elem_); + } + +private: + friend class directory_iterator; + friend class recursive_directory_iterator; + explicit __dir_element_proxy(directory_entry const& __e) : __elem_(__e) {} + __dir_element_proxy(__dir_element_proxy&& __o) + : __elem_(_VSTD::move(__o.__elem_)) {} + directory_entry __elem_; +}; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h new file mode 100644 index 000000000000..be958e0eb8de --- /dev/null +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -0,0 +1,150 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H +#define _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H + +#include <__availability> +#include <__config> +#include <__filesystem/directory_entry.h> +#include <__filesystem/directory_options.h> +#include <__filesystem/path.h> +#include <__iterator/iterator_traits.h> +#include <__memory/shared_ptr.h> +#include <__debug> +#include <__ranges/enable_borrowed_range.h> +#include <__ranges/enable_view.h> +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class _LIBCPP_HIDDEN __dir_stream; +class directory_iterator { +public: + typedef directory_entry value_type; + typedef ptrdiff_t difference_type; + typedef value_type const* pointer; + typedef value_type const& reference; + typedef input_iterator_tag iterator_category; + +public: + //ctor & dtor + directory_iterator() noexcept {} + + explicit directory_iterator(const path& __p) + : directory_iterator(__p, nullptr) {} + + directory_iterator(const path& __p, directory_options __opts) + : directory_iterator(__p, nullptr, __opts) {} + + directory_iterator(const path& __p, error_code& __ec) + : directory_iterator(__p, &__ec) {} + + directory_iterator(const path& __p, directory_options __opts, + error_code& __ec) + : directory_iterator(__p, &__ec, __opts) {} + + directory_iterator(const directory_iterator&) = default; + directory_iterator(directory_iterator&&) = default; + directory_iterator& operator=(const directory_iterator&) = default; + + directory_iterator& operator=(directory_iterator&& __o) noexcept { + // non-default implementation provided to support self-move assign. + if (this != &__o) { + __imp_ = _VSTD::move(__o.__imp_); + } + return *this; + } + + ~directory_iterator() = default; + + const directory_entry& operator*() const { + _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); + return __dereference(); + } + + const directory_entry* operator->() const { return &**this; } + + directory_iterator& operator++() { return __increment(); } + + __dir_element_proxy operator++(int) { + __dir_element_proxy __p(**this); + __increment(); + return __p; + } + + directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } + +private: + inline _LIBCPP_INLINE_VISIBILITY friend bool + operator==(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept; + + // construct the dir_stream + _LIBCPP_FUNC_VIS + directory_iterator(const path&, error_code*, + directory_options = directory_options::none); + + _LIBCPP_FUNC_VIS + directory_iterator& __increment(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + const directory_entry& __dereference() const; + +private: + shared_ptr<__dir_stream> __imp_; +}; + +inline _LIBCPP_INLINE_VISIBILITY bool +operator==(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept { + return __lhs.__imp_ == __rhs.__imp_; +} + +inline _LIBCPP_INLINE_VISIBILITY bool +operator!=(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept { + return !(__lhs == __rhs); +} + +// enable directory_iterator range-based for statements +inline _LIBCPP_INLINE_VISIBILITY directory_iterator +begin(directory_iterator __iter) noexcept { + return __iter; +} + +inline _LIBCPP_INLINE_VISIBILITY directory_iterator +end(directory_iterator) noexcept { + return directory_iterator(); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#if !defined(_LIBCPP_HAS_NO_RANGES) + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true; + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true; + +#endif + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H diff --git a/libcxx/include/__filesystem/directory_options.h b/libcxx/include/__filesystem/directory_options.h new file mode 100644 index 000000000000..79c0c2cbaa55 --- /dev/null +++ b/libcxx/include/__filesystem/directory_options.h @@ -0,0 +1,78 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H +#define _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +enum class _LIBCPP_ENUM_VIS directory_options : unsigned char { + none = 0, + follow_directory_symlink = 1, + skip_permission_denied = 2 +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator&(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator|(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator^(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator~(directory_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator&=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator|=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator^=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H diff --git a/libcxx/include/__filesystem/file_status.h b/libcxx/include/__filesystem/file_status.h new file mode 100644 index 000000000000..a8f653ab44fc --- /dev/null +++ b/libcxx/include/__filesystem/file_status.h @@ -0,0 +1,68 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILE_STATUS_H +#define _LIBCPP___FILESYSTEM_FILE_STATUS_H + +#include <__availability> +#include <__config> +#include <__filesystem/file_type.h> +#include <__filesystem/perms.h> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class _LIBCPP_TYPE_VIS file_status { +public: + // constructors + _LIBCPP_INLINE_VISIBILITY + file_status() noexcept : file_status(file_type::none) {} + _LIBCPP_INLINE_VISIBILITY + explicit file_status(file_type __ft, perms __prms = perms::unknown) noexcept + : __ft_(__ft), + __prms_(__prms) {} + + file_status(const file_status&) noexcept = default; + file_status(file_status&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + ~file_status() {} + + file_status& operator=(const file_status&) noexcept = default; + file_status& operator=(file_status&&) noexcept = default; + + // observers + _LIBCPP_INLINE_VISIBILITY + file_type type() const noexcept { return __ft_; } + + _LIBCPP_INLINE_VISIBILITY + perms permissions() const noexcept { return __prms_; } + + // modifiers + _LIBCPP_INLINE_VISIBILITY + void type(file_type __ft) noexcept { __ft_ = __ft; } + + _LIBCPP_INLINE_VISIBILITY + void permissions(perms __p) noexcept { __prms_ = __p; } + +private: + file_type __ft_; + perms __prms_; +}; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILE_STATUS_H diff --git a/libcxx/include/__filesystem/file_time_type.h b/libcxx/include/__filesystem/file_time_type.h new file mode 100644 index 000000000000..590146a06600 --- /dev/null +++ b/libcxx/include/__filesystem/file_time_type.h @@ -0,0 +1,27 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H +#define _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H + +#include <__availability> +#include <__config> +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +typedef chrono::time_point<_FilesystemClock> file_time_type; + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H diff --git a/libcxx/include/__filesystem/file_type.h b/libcxx/include/__filesystem/file_type.h new file mode 100644 index 000000000000..93bee86ad635 --- /dev/null +++ b/libcxx/include/__filesystem/file_type.h @@ -0,0 +1,39 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILE_TYPE_H +#define _LIBCPP___FILESYSTEM_FILE_TYPE_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +// On Windows, the library never identifies files as block, character, fifo +// or socket. +enum class _LIBCPP_ENUM_VIS file_type : signed char { + none = 0, + not_found = -1, + regular = 1, + directory = 2, + symlink = 3, + block = 4, + character = 5, + fifo = 6, + socket = 7, + unknown = 8 +}; + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILE_TYPE_H diff --git a/libcxx/include/__filesystem/filesystem_error.h b/libcxx/include/__filesystem/filesystem_error.h new file mode 100644 index 000000000000..a8c6977b48e9 --- /dev/null +++ b/libcxx/include/__filesystem/filesystem_error.h @@ -0,0 +1,99 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H +#define _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include <__memory/shared_ptr.h> +#include +#include +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +class _LIBCPP_AVAILABILITY_FILESYSTEM _LIBCPP_EXCEPTION_ABI filesystem_error : public system_error { +public: + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(path(), path())) { + __create_what(0); + } + + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, const path& __p1, error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(__p1, path())) { + __create_what(1); + } + + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, const path& __p1, const path& __p2, + error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(__p1, __p2)) { + __create_what(2); + } + + _LIBCPP_INLINE_VISIBILITY + const path& path1() const noexcept { return __storage_->__p1_; } + + _LIBCPP_INLINE_VISIBILITY + const path& path2() const noexcept { return __storage_->__p2_; } + + filesystem_error(const filesystem_error&) = default; + ~filesystem_error() override; // key function + + _LIBCPP_INLINE_VISIBILITY + const char* what() const noexcept override { + return __storage_->__what_.c_str(); + } + + void __create_what(int __num_paths); + +private: + struct _LIBCPP_HIDDEN _Storage { + _LIBCPP_INLINE_VISIBILITY + _Storage(const path& __p1, const path& __p2) : __p1_(__p1), __p2_(__p2) {} + + path __p1_; + path __p2_; + string __what_; + }; + shared_ptr<_Storage> __storage_; +}; + +// TODO(ldionne): We need to pop the pragma and push it again after +// filesystem_error to work around PR41078. +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +template +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY +#ifndef _LIBCPP_NO_EXCEPTIONS +void __throw_filesystem_error(_Args&&... __args) { + throw filesystem_error(_VSTD::forward<_Args>(__args)...); +} +#else +void __throw_filesystem_error(_Args&&...) { + _VSTD::abort(); +} +#endif +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H diff --git a/libcxx/include/__filesystem/operations.h b/libcxx/include/__filesystem/operations.h new file mode 100644 index 000000000000..19d6c2d437f9 --- /dev/null +++ b/libcxx/include/__filesystem/operations.h @@ -0,0 +1,599 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_OPERATIONS_H +#define _LIBCPP___FILESYSTEM_OPERATIONS_H + +#include <__availability> +#include <__config> +#include <__filesystem/copy_options.h> +#include <__filesystem/file_status.h> +#include <__filesystem/file_time_type.h> +#include <__filesystem/file_type.h> +#include <__filesystem/path.h> +#include <__filesystem/perm_options.h> +#include <__filesystem/perms.h> +#include <__filesystem/space_info.h> +#include +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +_LIBCPP_FUNC_VIS +path __absolute(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __canonical(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __copy_file(const path& __from, const path& __to, copy_options __opt, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __copy(const path& __from, const path& __to, copy_options __opt, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directories(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_directory_symlink(const path& __to, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directory(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directory(const path& p, const path& attributes, + error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_hard_link(const path& __to, const path& __new_hard_link, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_symlink(const path& __to, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __current_path(error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __current_path(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __equivalent(const path&, const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_status __status(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __file_size(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __hard_link_count(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_status __symlink_status(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_time_type __last_write_time(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __last_write_time(const path& p, file_time_type new_time, + error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +path __weakly_canonical(path const& __p, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __read_symlink(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __remove_all(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __remove(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __rename(const path& from, const path& to, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __resize_file(const path& p, uintmax_t size, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +path __temp_directory_path(error_code* __ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p) { + return __absolute(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p, + error_code& __ec) { + return __absolute(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p) { + return __canonical(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p, + error_code& __ec) { + return __canonical(__p, &__ec); +} + + +inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, + const path& __to) { + return __copy_file(__from, __to, copy_options::none); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +copy_file(const path& __from, const path& __to, error_code& __ec) { + return __copy_file(__from, __to, copy_options::none, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +copy_file(const path& __from, const path& __to, copy_options __opt) { + return __copy_file(__from, __to, __opt); +} + +inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, + const path& __to, + copy_options __opt, + error_code& __ec) { + return __copy_file(__from, __to, __opt, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy_symlink(const path& __existing, + const path& __new) { + __copy_symlink(__existing, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +copy_symlink(const path& __ext, const path& __new, error_code& __ec) noexcept { + __copy_symlink(__ext, __new, &__ec); +} + + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, + const path& __to) { + __copy(__from, __to, copy_options::none); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + error_code& __ec) { + __copy(__from, __to, copy_options::none, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + copy_options __opt) { + __copy(__from, __to, __opt); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + copy_options __opt, + error_code& __ec) { + __copy(__from, __to, __opt, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p) { + return __create_directories(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p, + error_code& __ec) { + return __create_directories(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_directory_symlink(const path& __to, const path& __new) { + __create_directory_symlink(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_directory_symlink(const path& __to, const path& __new, + error_code& __ec) noexcept { + __create_directory_symlink(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p) { + return __create_directory(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +create_directory(const path& __p, error_code& __ec) noexcept { + return __create_directory(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p, + const path& __attrs) { + return __create_directory(__p, __attrs); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +create_directory(const path& __p, const path& __attrs, + error_code& __ec) noexcept { + return __create_directory(__p, __attrs, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void create_hard_link(const path& __to, + const path& __new) { + __create_hard_link(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_hard_link(const path& __to, const path& __new, + error_code& __ec) noexcept { + __create_hard_link(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void create_symlink(const path& __to, + const path& __new) { + __create_symlink(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_symlink(const path& __to, const path& __new, error_code& __ec) noexcept { + return __create_symlink(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path current_path() { + return __current_path(); +} + +inline _LIBCPP_INLINE_VISIBILITY path current_path(error_code& __ec) { + return __current_path(&__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p) { + __current_path(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p, + error_code& __ec) noexcept { + __current_path(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool equivalent(const path& __p1, + const path& __p2) { + return __equivalent(__p1, __p2); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { + return __equivalent(__p1, __p2, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool status_known(file_status __s) noexcept { + return __s.type() != file_type::none; +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(file_status __s) noexcept { + return status_known(__s) && __s.type() != file_type::not_found; +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p) { + return exists(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p, + error_code& __ec) noexcept { + auto __s = __status(__p, &__ec); + if (status_known(__s)) + __ec.clear(); + return exists(__s); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t file_size(const path& __p) { + return __file_size(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t +file_size(const path& __p, error_code& __ec) noexcept { + return __file_size(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t hard_link_count(const path& __p) { + return __hard_link_count(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t +hard_link_count(const path& __p, error_code& __ec) noexcept { + return __hard_link_count(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(file_status __s) noexcept { + return __s.type() == file_type::block; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p) { + return is_block_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p, + error_code& __ec) noexcept { + return is_block_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_character_file(file_status __s) noexcept { + return __s.type() == file_type::character; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_character_file(const path& __p) { + return is_character_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_character_file(const path& __p, error_code& __ec) noexcept { + return is_character_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(file_status __s) noexcept { + return __s.type() == file_type::directory; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p) { + return is_directory(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p, + error_code& __ec) noexcept { + return is_directory(__status(__p, &__ec)); +} + +_LIBCPP_FUNC_VIS +bool __fs_is_empty(const path& p, error_code* ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p) { + return __fs_is_empty(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p, + error_code& __ec) { + return __fs_is_empty(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(file_status __s) noexcept { + return __s.type() == file_type::fifo; +} +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p) { + return is_fifo(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p, + error_code& __ec) noexcept { + return is_fifo(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_regular_file(file_status __s) noexcept { + return __s.type() == file_type::regular; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_regular_file(const path& __p) { + return is_regular_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_regular_file(const path& __p, error_code& __ec) noexcept { + return is_regular_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(file_status __s) noexcept { + return __s.type() == file_type::symlink; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p) { + return is_symlink(__symlink_status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p, + error_code& __ec) noexcept { + return is_symlink(__symlink_status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(file_status __s) noexcept { + return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && + !is_symlink(__s); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p) { + return is_other(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p, + error_code& __ec) noexcept { + return is_other(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(file_status __s) noexcept { + return __s.type() == file_type::socket; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p) { + return is_socket(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p, + error_code& __ec) noexcept { + return is_socket(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY file_time_type +last_write_time(const path& __p) { + return __last_write_time(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_time_type +last_write_time(const path& __p, error_code& __ec) noexcept { + return __last_write_time(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void last_write_time(const path& __p, + file_time_type __t) { + __last_write_time(__p, __t); +} + +inline _LIBCPP_INLINE_VISIBILITY void +last_write_time(const path& __p, file_time_type __t, + error_code& __ec) noexcept { + __last_write_time(__p, __t, &__ec); +} + +_LIBCPP_FUNC_VIS +void __permissions(const path&, perms, perm_options, error_code* = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY void +permissions(const path& __p, perms __prms, + perm_options __opts = perm_options::replace) { + __permissions(__p, __prms, __opts); +} + +inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, + error_code& __ec) noexcept { + __permissions(__p, __prms, perm_options::replace, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, + perm_options __opts, + error_code& __ec) { + __permissions(__p, __prms, __opts, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, + const path& __base, + error_code& __ec) { + path __tmp = __weakly_canonical(__p, &__ec); + if (__ec) + return {}; + path __tmp_base = __weakly_canonical(__base, &__ec); + if (__ec) + return {}; + return __tmp.lexically_proximate(__tmp_base); +} + +inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, + error_code& __ec) { + return proximate(__p, current_path(), __ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path +proximate(const path& __p, const path& __base = current_path()) { + return __weakly_canonical(__p).lexically_proximate( + __weakly_canonical(__base)); +} + +inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p) { + return __read_symlink(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p, + error_code& __ec) { + return __read_symlink(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, + const path& __base, + error_code& __ec) { + path __tmp = __weakly_canonical(__p, &__ec); + if (__ec) + return path(); + path __tmpbase = __weakly_canonical(__base, &__ec); + if (__ec) + return path(); + return __tmp.lexically_relative(__tmpbase); +} + +inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, + error_code& __ec) { + return relative(__p, current_path(), __ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path +relative(const path& __p, const path& __base = current_path()) { + return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p) { + return __remove_all(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p, + error_code& __ec) { + return __remove_all(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p) { + return __remove(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p, + error_code& __ec) noexcept { + return __remove(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void rename(const path& __from, + const path& __to) { + return __rename(__from, __to); +} + +inline _LIBCPP_INLINE_VISIBILITY void +rename(const path& __from, const path& __to, error_code& __ec) noexcept { + return __rename(__from, __to, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void resize_file(const path& __p, + uintmax_t __ns) { + return __resize_file(__p, __ns); +} + +inline _LIBCPP_INLINE_VISIBILITY void +resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { + return __resize_file(__p, __ns, &__ec); +} + +_LIBCPP_FUNC_VIS +space_info __space(const path&, error_code* __ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p) { + return __space(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p, + error_code& __ec) noexcept { + return __space(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p) { + return __status(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p, + error_code& __ec) noexcept { + return __status(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status symlink_status(const path& __p) { + return __symlink_status(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status +symlink_status(const path& __p, error_code& __ec) noexcept { + return __symlink_status(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path() { + return __temp_directory_path(); +} + +inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path(error_code& __ec) { + return __temp_directory_path(&__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p) { + return __weakly_canonical(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p, + error_code& __ec) { + return __weakly_canonical(__p, &__ec); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_OPERATIONS_H diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h new file mode 100644 index 000000000000..a6d1ee997d91 --- /dev/null +++ b/libcxx/include/__filesystem/path.h @@ -0,0 +1,1018 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PATH_H +#define _LIBCPP___FILESYSTEM_PATH_H + +#include <__availability> +#include <__config> +#include +#include +#include <__iterator/back_insert_iterator.h> +#include <__iterator/iterator_traits.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +# include // for quoted +#endif + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +template +struct __can_convert_char { + static const bool value = false; +}; +template +struct __can_convert_char : public __can_convert_char<_Tp> {}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = wchar_t; +}; +#ifndef _LIBCPP_HAS_NO_CHAR8_T +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char8_t; +}; +#endif +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char16_t; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char32_t; +}; + +template +typename enable_if<__can_convert_char<_ECharT>::value, bool>::type +__is_separator(_ECharT __e) { +#if defined(_LIBCPP_WIN32API) + return __e == _ECharT('/') || __e == _ECharT('\\'); +#else + return __e == _ECharT('/'); +#endif +} + +#ifndef _LIBCPP_HAS_NO_CHAR8_T +typedef u8string __u8_string; +#else +typedef string __u8_string; +#endif + +struct _NullSentinel {}; + +template +using _Void = void; + +template +struct __is_pathable_string : public false_type {}; + +template +struct __is_pathable_string< + basic_string<_ECharT, _Traits, _Alloc>, + _Void::__char_type> > + : public __can_convert_char<_ECharT> { + using _Str = basic_string<_ECharT, _Traits, _Alloc>; + using _Base = __can_convert_char<_ECharT>; + static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + static _ECharT const* __range_end(_Str const& __s) { + return __s.data() + __s.length(); + } + static _ECharT __first_or_null(_Str const& __s) { + return __s.empty() ? _ECharT{} : __s[0]; + } +}; + +template +struct __is_pathable_string< + basic_string_view<_ECharT, _Traits>, + _Void::__char_type> > + : public __can_convert_char<_ECharT> { + using _Str = basic_string_view<_ECharT, _Traits>; + using _Base = __can_convert_char<_ECharT>; + static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + static _ECharT const* __range_end(_Str const& __s) { + return __s.data() + __s.length(); + } + static _ECharT __first_or_null(_Str const& __s) { + return __s.empty() ? _ECharT{} : __s[0]; + } +}; + +template ::type, + class _UnqualPtrType = + typename remove_const::type>::type, + bool _IsCharPtr = is_pointer<_DS>::value&& + __can_convert_char<_UnqualPtrType>::value> +struct __is_pathable_char_array : false_type {}; + +template +struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> + : __can_convert_char::type> { + using _Base = __can_convert_char::type>; + + static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } + static _ECharT const* __range_end(const _ECharT* __b) { + using _Iter = const _ECharT*; + const _ECharT __sentinel = _ECharT{}; + _Iter __e = __b; + for (; *__e != __sentinel; ++__e) + ; + return __e; + } + + static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } +}; + +template ::value, + class = void> +struct __is_pathable_iter : false_type {}; + +template +struct __is_pathable_iter< + _Iter, true, + _Void::value_type>::__char_type> > + : __can_convert_char::value_type> { + using _ECharT = typename iterator_traits<_Iter>::value_type; + using _Base = __can_convert_char<_ECharT>; + + static _Iter __range_begin(_Iter __b) { return __b; } + static _NullSentinel __range_end(_Iter) { return _NullSentinel{}; } + + static _ECharT __first_or_null(_Iter __b) { return *__b; } +}; + +template ::value, + bool _IsCharIterT = __is_pathable_char_array<_Tp>::value, + bool _IsIterT = !_IsCharIterT && __is_pathable_iter<_Tp>::value> +struct __is_pathable : false_type { + static_assert(!_IsStringT && !_IsCharIterT && !_IsIterT, "Must all be false"); +}; + +template +struct __is_pathable<_Tp, true, false, false> : __is_pathable_string<_Tp> {}; + +template +struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> { +}; + +template +struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {}; + +#if defined(_LIBCPP_WIN32API) +typedef wstring __path_string; +typedef wchar_t __path_value; +#else +typedef string __path_string; +typedef char __path_value; +#endif + +#if defined(_LIBCPP_WIN32API) +_LIBCPP_FUNC_VIS +size_t __wide_to_char(const wstring&, char*, size_t); +_LIBCPP_FUNC_VIS +size_t __char_to_wide(const string&, wchar_t*, size_t); +#endif + +template +struct _PathCVT; + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +template +struct _PathCVT { + static_assert(__can_convert_char<_ECharT>::value, + "Char type not convertible"); + + typedef __narrow_to_utf8 _Narrower; +#if defined(_LIBCPP_WIN32API) + typedef __widen_from_utf8 _Widener; +#endif + + static void __append_range(__path_string& __dest, _ECharT const* __b, + _ECharT const* __e) { +#if defined(_LIBCPP_WIN32API) + string __utf8; + _Narrower()(back_inserter(__utf8), __b, __e); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); +#else + _Narrower()(back_inserter(__dest), __b, __e); +#endif + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); + if (__b == __e) + return; + basic_string<_ECharT> __tmp(__b, __e); +#if defined(_LIBCPP_WIN32API) + string __utf8; + _Narrower()(back_inserter(__utf8), __tmp.data(), + __tmp.data() + __tmp.length()); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); +#else + _Narrower()(back_inserter(__dest), __tmp.data(), + __tmp.data() + __tmp.length()); +#endif + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { + static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); + const _ECharT __sentinel = _ECharT{}; + if (*__b == __sentinel) + return; + basic_string<_ECharT> __tmp; + for (; *__b != __sentinel; ++__b) + __tmp.push_back(*__b); +#if defined(_LIBCPP_WIN32API) + string __utf8; + _Narrower()(back_inserter(__utf8), __tmp.data(), + __tmp.data() + __tmp.length()); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); +#else + _Narrower()(back_inserter(__dest), __tmp.data(), + __tmp.data() + __tmp.length()); +#endif + } + + template + static void __append_source(__path_string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; +#endif // !_LIBCPP_HAS_NO_LOCALIZATION + +template <> +struct _PathCVT<__path_value> { + + template + static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + for (; __b != __e; ++__b) + __dest.push_back(*__b); + } + + template + static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + __dest.append(__b, __e); + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { + const char __sentinel = char{}; + for (; *__b != __sentinel; ++__b) + __dest.push_back(*__b); + } + + template + static void __append_source(__path_string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; + +#if defined(_LIBCPP_WIN32API) +template <> +struct _PathCVT { + + static void + __append_string(__path_string& __dest, const basic_string &__str) { + size_t __size = __char_to_wide(__str, nullptr, 0); + size_t __pos = __dest.size(); + __dest.resize(__pos + __size); + __char_to_wide(__str, const_cast<__path_value*>(__dest.data()) + __pos, __size); + } + + template + static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + basic_string __tmp(__b, __e); + __append_string(__dest, __tmp); + } + + template + static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + basic_string __tmp(__b, __e); + __append_string(__dest, __tmp); + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { + const char __sentinel = char{}; + basic_string __tmp; + for (; *__b != __sentinel; ++__b) + __tmp.push_back(*__b); + __append_string(__dest, __tmp); + } + + template + static void __append_source(__path_string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; + +template +struct _PathExport { + typedef __narrow_to_utf8 _Narrower; + typedef __widen_from_utf8 _Widener; + + template + static void __append(_Str& __dest, const __path_string& __src) { + string __utf8; + _Narrower()(back_inserter(__utf8), __src.data(), __src.data() + __src.size()); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); + } +}; + +template <> +struct _PathExport { + template + static void __append(_Str& __dest, const __path_string& __src) { + size_t __size = __wide_to_char(__src, nullptr, 0); + size_t __pos = __dest.size(); + __dest.resize(__size); + __wide_to_char(__src, const_cast(__dest.data()) + __pos, __size); + } +}; + +template <> +struct _PathExport { + template + static void __append(_Str& __dest, const __path_string& __src) { + __dest.append(__src.begin(), __src.end()); + } +}; + +template <> +struct _PathExport { + template + static void __append(_Str& __dest, const __path_string& __src) { + __dest.append(__src.begin(), __src.end()); + } +}; + +#ifndef _LIBCPP_HAS_NO_CHAR8_T +template <> +struct _PathExport { + typedef __narrow_to_utf8 _Narrower; + + template + static void __append(_Str& __dest, const __path_string& __src) { + _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); + } +}; +#endif /* !_LIBCPP_HAS_NO_CHAR8_T */ +#endif /* _LIBCPP_WIN32API */ + +class _LIBCPP_TYPE_VIS path { + template + using _EnableIfPathable = + typename enable_if<__is_pathable<_SourceOrIter>::value, _Tp>::type; + + template + using _SourceChar = typename __is_pathable<_Tp>::__char_type; + + template + using _SourceCVT = _PathCVT<_SourceChar<_Tp> >; + +public: +#if defined(_LIBCPP_WIN32API) + typedef wchar_t value_type; + static constexpr value_type preferred_separator = L'\\'; +#else + typedef char value_type; + static constexpr value_type preferred_separator = '/'; +#endif + typedef basic_string string_type; + typedef basic_string_view __string_view; + + enum _LIBCPP_ENUM_VIS format : unsigned char { + auto_format, + native_format, + generic_format + }; + + // constructors and destructor + _LIBCPP_INLINE_VISIBILITY path() noexcept {} + _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} + _LIBCPP_INLINE_VISIBILITY path(path&& __p) noexcept + : __pn_(_VSTD::move(__p.__pn_)) {} + + _LIBCPP_INLINE_VISIBILITY + path(string_type&& __s, format = format::auto_format) noexcept + : __pn_(_VSTD::move(__s)) {} + + template > + path(const _Source& __src, format = format::auto_format) { + _SourceCVT<_Source>::__append_source(__pn_, __src); + } + + template + path(_InputIt __first, _InputIt __last, format = format::auto_format) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + } + +/* +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + // TODO Implement locale conversions. + template > + path(const _Source& __src, const locale& __loc, format = format::auto_format); + template + path(_InputIt __first, _InputIt _last, const locale& __loc, + format = format::auto_format); +#endif +*/ + + _LIBCPP_INLINE_VISIBILITY + ~path() = default; + + // assignments + _LIBCPP_INLINE_VISIBILITY + path& operator=(const path& __p) { + __pn_ = __p.__pn_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator=(path&& __p) noexcept { + __pn_ = _VSTD::move(__p.__pn_); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator=(string_type&& __s) noexcept { + __pn_ = _VSTD::move(__s); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& assign(string_type&& __s) noexcept { + __pn_ = _VSTD::move(__s); + return *this; + } + + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator=(const _Source& __src) { + return this->assign(__src); + } + + template + _EnableIfPathable<_Source> assign(const _Source& __src) { + __pn_.clear(); + _SourceCVT<_Source>::__append_source(__pn_, __src); + return *this; + } + + template + path& assign(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + __pn_.clear(); + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + return *this; + } + +public: + // appends +#if defined(_LIBCPP_WIN32API) + path& operator/=(const path& __p) { + auto __p_root_name = __p.__root_name(); + auto __p_root_name_size = __p_root_name.size(); + if (__p.is_absolute() || + (!__p_root_name.empty() && __p_root_name != __string_view(root_name().__pn_))) { + __pn_ = __p.__pn_; + return *this; + } + if (__p.has_root_directory()) { + path __root_name_str = root_name(); + __pn_ = __root_name_str.native(); + __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); + return *this; + } + if (has_filename() || (!has_root_directory() && is_absolute())) + __pn_ += preferred_separator; + __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); + return *this; + } + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator/=(const _Source& __src) { + return operator/=(path(__src)); + } + + template + _EnableIfPathable<_Source> append(const _Source& __src) { + return operator/=(path(__src)); + } + + template + path& append(_InputIt __first, _InputIt __last) { + return operator/=(path(__first, __last)); + } +#else + path& operator/=(const path& __p) { + if (__p.is_absolute()) { + __pn_ = __p.__pn_; + return *this; + } + if (has_filename()) + __pn_ += preferred_separator; + __pn_ += __p.native(); + return *this; + } + + // FIXME: Use _LIBCPP_DIAGNOSE_WARNING to produce a diagnostic when __src + // is known at compile time to be "/' since the user almost certainly intended + // to append a separator instead of overwriting the path with "/" + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator/=(const _Source& __src) { + return this->append(__src); + } + + template + _EnableIfPathable<_Source> append(const _Source& __src) { + using _Traits = __is_pathable<_Source>; + using _CVT = _PathCVT<_SourceChar<_Source> >; + bool __source_is_absolute = __is_separator(_Traits::__first_or_null(__src)); + if (__source_is_absolute) + __pn_.clear(); + else if (has_filename()) + __pn_ += preferred_separator; + _CVT::__append_source(__pn_, __src); + return *this; + } + + template + path& append(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); + using _CVT = _PathCVT<_ItVal>; + if (__first != __last && __is_separator(*__first)) + __pn_.clear(); + else if (has_filename()) + __pn_ += preferred_separator; + _CVT::__append_range(__pn_, __first, __last); + return *this; + } +#endif + + // concatenation + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const path& __x) { + __pn_ += __x.__pn_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const string_type& __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(__string_view __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const value_type* __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(value_type __x) { + __pn_ += __x; + return *this; + } + + template + typename enable_if<__can_convert_char<_ECharT>::value, path&>::type + operator+=(_ECharT __x) { + _PathCVT<_ECharT>::__append_source(__pn_, + basic_string_view<_ECharT>(&__x, 1)); + return *this; + } + + template + _EnableIfPathable<_Source> operator+=(const _Source& __x) { + return this->concat(__x); + } + + template + _EnableIfPathable<_Source> concat(const _Source& __x) { + _SourceCVT<_Source>::__append_source(__pn_, __x); + return *this; + } + + template + path& concat(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + return *this; + } + + // modifiers + _LIBCPP_INLINE_VISIBILITY + void clear() noexcept { __pn_.clear(); } + + path& make_preferred() { +#if defined(_LIBCPP_WIN32API) + _VSTD::replace(__pn_.begin(), __pn_.end(), L'/', L'\\'); +#endif + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& remove_filename() { + auto __fname = __filename(); + if (!__fname.empty()) + __pn_.erase(__fname.data() - __pn_.data()); + return *this; + } + + path& replace_filename(const path& __replacement) { + remove_filename(); + return (*this /= __replacement); + } + + path& replace_extension(const path& __replacement = path()); + + _LIBCPP_INLINE_VISIBILITY + void swap(path& __rhs) noexcept { __pn_.swap(__rhs.__pn_); } + + // private helper to allow reserving memory in the path + _LIBCPP_INLINE_VISIBILITY + void __reserve(size_t __s) { __pn_.reserve(__s); } + + // native format observers + _LIBCPP_INLINE_VISIBILITY + const string_type& native() const noexcept { return __pn_; } + + _LIBCPP_INLINE_VISIBILITY + const value_type* c_str() const noexcept { return __pn_.c_str(); } + + _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } + +#if defined(_LIBCPP_WIN32API) + _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { return __pn_; } + + _VSTD::wstring generic_wstring() const { + _VSTD::wstring __s; + __s.resize(__pn_.size()); + _VSTD::replace_copy(__pn_.begin(), __pn_.end(), __s.begin(), '\\', '/'); + return __s; + } + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + string(const _Allocator& __a = _Allocator()) const { + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s(__a); + __s.reserve(__pn_.size()); + _PathExport<_ECharT>::__append(__s, __pn_); + return __s; + } + + _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const { + using _CVT = __narrow_to_utf8; + __u8_string __s; + __s.reserve(__pn_.size()); + _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); + return __s; + } + + _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { + return string(); + } + + // generic format observers + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + generic_string(const _Allocator& __a = _Allocator()) const { + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s = string<_ECharT, _Traits, _Allocator>(__a); + // Note: This (and generic_u8string below) is slightly suboptimal as + // it iterates twice over the string; once to convert it to the right + // character type, and once to replace path delimiters. + _VSTD::replace(__s.begin(), __s.end(), + static_cast<_ECharT>('\\'), static_cast<_ECharT>('/')); + return __s; + } + + _VSTD::string generic_string() const { return generic_string(); } + _VSTD::u16string generic_u16string() const { return generic_string(); } + _VSTD::u32string generic_u32string() const { return generic_string(); } + __u8_string generic_u8string() const { + __u8_string __s = u8string(); + _VSTD::replace(__s.begin(), __s.end(), '\\', '/'); + return __s; + } +#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +#else /* _LIBCPP_WIN32API */ + + _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { return __pn_; } +#ifndef _LIBCPP_HAS_NO_CHAR8_T + _LIBCPP_INLINE_VISIBILITY _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } +#else + _LIBCPP_INLINE_VISIBILITY _VSTD::string u8string() const { return __pn_; } +#endif + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + string(const _Allocator& __a = _Allocator()) const { + using _CVT = __widen_from_utf8; + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s(__a); + __s.reserve(__pn_.size()); + _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); + return __s; + } + +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { + return string(); + } +#endif + _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { + return string(); + } +#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ + + // generic format observers + _VSTD::string generic_string() const { return __pn_; } +#ifndef _LIBCPP_HAS_NO_CHAR8_T + _VSTD::u8string generic_u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } +#else + _VSTD::string generic_u8string() const { return __pn_; } +#endif + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + generic_string(const _Allocator& __a = _Allocator()) const { + return string<_ECharT, _Traits, _Allocator>(__a); + } + +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + _VSTD::wstring generic_wstring() const { return string(); } +#endif + _VSTD::u16string generic_u16string() const { return string(); } + _VSTD::u32string generic_u32string() const { return string(); } +#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +#endif /* !_LIBCPP_WIN32API */ + +private: + int __compare(__string_view) const; + __string_view __root_name() const; + __string_view __root_directory() const; + __string_view __root_path_raw() const; + __string_view __relative_path() const; + __string_view __parent_path() const; + __string_view __filename() const; + __string_view __stem() const; + __string_view __extension() const; + +public: + // compare + _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const noexcept { + return __compare(__p.__pn_); + } + _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { + return __compare(__s); + } + _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { + return __compare(__s); + } + _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { + return __compare(__s); + } + + // decomposition + _LIBCPP_INLINE_VISIBILITY path root_name() const { + return string_type(__root_name()); + } + _LIBCPP_INLINE_VISIBILITY path root_directory() const { + return string_type(__root_directory()); + } + _LIBCPP_INLINE_VISIBILITY path root_path() const { +#if defined(_LIBCPP_WIN32API) + return string_type(__root_path_raw()); +#else + return root_name().append(string_type(__root_directory())); +#endif + } + _LIBCPP_INLINE_VISIBILITY path relative_path() const { + return string_type(__relative_path()); + } + _LIBCPP_INLINE_VISIBILITY path parent_path() const { + return string_type(__parent_path()); + } + _LIBCPP_INLINE_VISIBILITY path filename() const { + return string_type(__filename()); + } + _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem()); } + _LIBCPP_INLINE_VISIBILITY path extension() const { + return string_type(__extension()); + } + + // query + _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool + empty() const noexcept { + return __pn_.empty(); + } + + _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { + return !__root_name().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { + return !__root_directory().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { + return !__root_path_raw().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { + return !__relative_path().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { + return !__parent_path().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_filename() const { + return !__filename().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } + _LIBCPP_INLINE_VISIBILITY bool has_extension() const { + return !__extension().empty(); + } + + _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { +#if defined(_LIBCPP_WIN32API) + __string_view __root_name_str = __root_name(); + __string_view __root_dir = __root_directory(); + if (__root_name_str.size() == 2 && __root_name_str[1] == ':') { + // A drive letter with no root directory is relative, e.g. x:example. + return !__root_dir.empty(); + } + // If no root name, it's relative, e.g. \example is relative to the current drive + if (__root_name_str.empty()) + return false; + if (__root_name_str.size() < 3) + return false; + // A server root name, like \\server, is always absolute + if (__root_name_str[0] != '/' && __root_name_str[0] != '\\') + return false; + if (__root_name_str[1] != '/' && __root_name_str[1] != '\\') + return false; + // Seems to be a server root name + return true; +#else + return has_root_directory(); +#endif + } + _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } + + // relative paths + path lexically_normal() const; + path lexically_relative(const path& __base) const; + + _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { + path __result = this->lexically_relative(__base); + if (__result.native().empty()) + return *this; + return __result; + } + + // iterators + class _LIBCPP_TYPE_VIS iterator; + typedef iterator const_iterator; + + iterator begin() const; + iterator end() const; + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template + _LIBCPP_INLINE_VISIBILITY friend + typename enable_if::value && + is_same<_Traits, char_traits >::value, + basic_ostream<_CharT, _Traits>&>::type + operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { + __os << _VSTD::__quoted(__p.native()); + return __os; + } + + template + _LIBCPP_INLINE_VISIBILITY friend + typename enable_if::value || + !is_same<_Traits, char_traits >::value, + basic_ostream<_CharT, _Traits>&>::type + operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { + __os << _VSTD::__quoted(__p.string<_CharT, _Traits>()); + return __os; + } + + template + _LIBCPP_INLINE_VISIBILITY friend basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) { + basic_string<_CharT, _Traits> __tmp; + __is >> __quoted(__tmp); + __p = __tmp; + return __is; + } +#endif // !_LIBCPP_HAS_NO_LOCALIZATION + + friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) == 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) != 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) < 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) <= 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) > 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) >= 0; + } + + friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, + const path& __rhs) { + path __result(__lhs); + __result /= __rhs; + return __result; + } +private: + inline _LIBCPP_INLINE_VISIBILITY path& + __assign_view(__string_view const& __s) noexcept { + __pn_ = string_type(__s); + return *this; + } + string_type __pn_; +}; + +inline _LIBCPP_INLINE_VISIBILITY void swap(path& __lhs, path& __rhs) noexcept { + __lhs.swap(__rhs); +} + +_LIBCPP_FUNC_VIS +size_t hash_value(const path& __p) noexcept; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PATH_H diff --git a/libcxx/include/__filesystem/path_iterator.h b/libcxx/include/__filesystem/path_iterator.h new file mode 100644 index 000000000000..62f8dc6fd357 --- /dev/null +++ b/libcxx/include/__filesystem/path_iterator.h @@ -0,0 +1,132 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PATH_ITERATOR_H +#define _LIBCPP___FILESYSTEM_PATH_ITERATOR_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include <__iterator/iterator_traits.h> +#include <__debug> +#include +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class _LIBCPP_TYPE_VIS path::iterator { +public: + enum _ParserState : unsigned char { + _Singular, + _BeforeBegin, + _InRootName, + _InRootDir, + _InFilenames, + _InTrailingSep, + _AtEnd + }; + +public: + typedef bidirectional_iterator_tag iterator_category; + + typedef path value_type; + typedef ptrdiff_t difference_type; + typedef const path* pointer; + typedef const path& reference; + + typedef void + __stashing_iterator_tag; // See reverse_iterator and __is_stashing_iterator + +public: + _LIBCPP_INLINE_VISIBILITY + iterator() + : __stashed_elem_(), __path_ptr_(nullptr), __entry_(), + __state_(_Singular) {} + + iterator(const iterator&) = default; + ~iterator() = default; + + iterator& operator=(const iterator&) = default; + + _LIBCPP_INLINE_VISIBILITY + reference operator*() const { return __stashed_elem_; } + + _LIBCPP_INLINE_VISIBILITY + pointer operator->() const { return &__stashed_elem_; } + + _LIBCPP_INLINE_VISIBILITY + iterator& operator++() { + _LIBCPP_ASSERT(__state_ != _Singular, + "attempting to increment a singular iterator"); + _LIBCPP_ASSERT(__state_ != _AtEnd, + "attempting to increment the end iterator"); + return __increment(); + } + + _LIBCPP_INLINE_VISIBILITY + iterator operator++(int) { + iterator __it(*this); + this->operator++(); + return __it; + } + + _LIBCPP_INLINE_VISIBILITY + iterator& operator--() { + _LIBCPP_ASSERT(__state_ != _Singular, + "attempting to decrement a singular iterator"); + _LIBCPP_ASSERT(__entry_.data() != __path_ptr_->native().data(), + "attempting to decrement the begin iterator"); + return __decrement(); + } + + _LIBCPP_INLINE_VISIBILITY + iterator operator--(int) { + iterator __it(*this); + this->operator--(); + return __it; + } + +private: + friend class path; + + inline _LIBCPP_INLINE_VISIBILITY friend bool operator==(const iterator&, + const iterator&); + + iterator& __increment(); + iterator& __decrement(); + + path __stashed_elem_; + const path* __path_ptr_; + path::__string_view __entry_; + _ParserState __state_; +}; + +inline _LIBCPP_INLINE_VISIBILITY bool operator==(const path::iterator& __lhs, + const path::iterator& __rhs) { + return __lhs.__path_ptr_ == __rhs.__path_ptr_ && + __lhs.__entry_.data() == __rhs.__entry_.data(); +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const path::iterator& __lhs, + const path::iterator& __rhs) { + return !(__lhs == __rhs); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PATH_ITERATOR_H diff --git a/libcxx/include/__filesystem/perm_options.h b/libcxx/include/__filesystem/perm_options.h new file mode 100644 index 000000000000..62cd8f575650 --- /dev/null +++ b/libcxx/include/__filesystem/perm_options.h @@ -0,0 +1,73 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PERM_OPTIONS_H +#define _LIBCPP___FILESYSTEM_PERM_OPTIONS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { + replace = 1, + add = 2, + remove = 4, + nofollow = 8 +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator&(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator|(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator^(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator~(perm_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PERM_OPTIONS_H diff --git a/libcxx/include/__filesystem/perms.h b/libcxx/include/__filesystem/perms.h new file mode 100644 index 000000000000..832f8b07e55c --- /dev/null +++ b/libcxx/include/__filesystem/perms.h @@ -0,0 +1,91 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PERMS_H +#define _LIBCPP___FILESYSTEM_PERMS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +// On Windows, these permission bits map to one single readonly flag per +// file, and the executable bit is always returned as set. When setting +// permissions, as long as the write bit is set for either owner, group or +// others, the readonly flag is cleared. +enum class _LIBCPP_ENUM_VIS perms : unsigned { + none = 0, + + owner_read = 0400, + owner_write = 0200, + owner_exec = 0100, + owner_all = 0700, + + group_read = 040, + group_write = 020, + group_exec = 010, + group_all = 070, + + others_read = 04, + others_write = 02, + others_exec = 01, + others_all = 07, + + all = 0777, + + set_uid = 04000, + set_gid = 02000, + sticky_bit = 01000, + mask = 07777, + unknown = 0xFFFF, +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator&(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator|(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator^(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator~(perms _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator&=(perms& _LHS, perms _RHS) { return _LHS = _LHS & _RHS; } + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator|=(perms& _LHS, perms _RHS) { return _LHS = _LHS | _RHS; } + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator^=(perms& _LHS, perms _RHS) { return _LHS = _LHS ^ _RHS; } + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PERMS_H diff --git a/libcxx/include/__filesystem/recursive_directory_iterator.h b/libcxx/include/__filesystem/recursive_directory_iterator.h new file mode 100644 index 000000000000..db7e793e8530 --- /dev/null +++ b/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -0,0 +1,181 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H +#define _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H + +#include <__availability> +#include <__config> +#include <__filesystem/directory_entry.h> +#include <__filesystem/directory_options.h> +#include <__filesystem/path.h> +#include <__iterator/iterator_traits.h> +#include <__memory/shared_ptr.h> +#include <__ranges/enable_borrowed_range.h> +#include <__ranges/enable_view.h> +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class recursive_directory_iterator { +public: + using value_type = directory_entry; + using difference_type = ptrdiff_t; + using pointer = directory_entry const*; + using reference = directory_entry const&; + using iterator_category = input_iterator_tag; + +public: + // constructors and destructor + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator() noexcept : __rec_(false) {} + + _LIBCPP_INLINE_VISIBILITY + explicit recursive_directory_iterator( + const path& __p, directory_options __xoptions = directory_options::none) + : recursive_directory_iterator(__p, __xoptions, nullptr) {} + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator(const path& __p, directory_options __xoptions, + error_code& __ec) + : recursive_directory_iterator(__p, __xoptions, &__ec) {} + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator(const path& __p, error_code& __ec) + : recursive_directory_iterator(__p, directory_options::none, &__ec) {} + + recursive_directory_iterator(const recursive_directory_iterator&) = default; + recursive_directory_iterator(recursive_directory_iterator&&) = default; + + recursive_directory_iterator& + operator=(const recursive_directory_iterator&) = default; + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator& + operator=(recursive_directory_iterator&& __o) noexcept { + // non-default implementation provided to support self-move assign. + if (this != &__o) { + __imp_ = _VSTD::move(__o.__imp_); + __rec_ = __o.__rec_; + } + return *this; + } + + ~recursive_directory_iterator() = default; + + _LIBCPP_INLINE_VISIBILITY + const directory_entry& operator*() const { return __dereference(); } + + _LIBCPP_INLINE_VISIBILITY + const directory_entry* operator->() const { return &__dereference(); } + + recursive_directory_iterator& operator++() { return __increment(); } + + _LIBCPP_INLINE_VISIBILITY + __dir_element_proxy operator++(int) { + __dir_element_proxy __p(**this); + __increment(); + return __p; + } + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator& increment(error_code& __ec) { + return __increment(&__ec); + } + + _LIBCPP_FUNC_VIS directory_options options() const; + _LIBCPP_FUNC_VIS int depth() const; + + _LIBCPP_INLINE_VISIBILITY + void pop() { __pop(); } + + _LIBCPP_INLINE_VISIBILITY + void pop(error_code& __ec) { __pop(&__ec); } + + _LIBCPP_INLINE_VISIBILITY + bool recursion_pending() const { return __rec_; } + + _LIBCPP_INLINE_VISIBILITY + void disable_recursion_pending() { __rec_ = false; } + +private: + _LIBCPP_FUNC_VIS + recursive_directory_iterator(const path& __p, directory_options __opt, + error_code* __ec); + + _LIBCPP_FUNC_VIS + const directory_entry& __dereference() const; + + _LIBCPP_FUNC_VIS + bool __try_recursion(error_code* __ec); + + _LIBCPP_FUNC_VIS + void __advance(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + recursive_directory_iterator& __increment(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + void __pop(error_code* __ec = nullptr); + + inline _LIBCPP_INLINE_VISIBILITY friend bool + operator==(const recursive_directory_iterator&, + const recursive_directory_iterator&) noexcept; + + struct _LIBCPP_HIDDEN __shared_imp; + shared_ptr<__shared_imp> __imp_; + bool __rec_; +}; // class recursive_directory_iterator + +inline _LIBCPP_INLINE_VISIBILITY bool +operator==(const recursive_directory_iterator& __lhs, + const recursive_directory_iterator& __rhs) noexcept { + return __lhs.__imp_ == __rhs.__imp_; +} + +_LIBCPP_INLINE_VISIBILITY +inline bool operator!=(const recursive_directory_iterator& __lhs, + const recursive_directory_iterator& __rhs) noexcept { + return !(__lhs == __rhs); +} +// enable recursive_directory_iterator range-based for statements +inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator +begin(recursive_directory_iterator __iter) noexcept { + return __iter; +} + +inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator +end(recursive_directory_iterator) noexcept { + return recursive_directory_iterator(); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#if !defined(_LIBCPP_HAS_NO_RANGES) + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true; + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true; + +#endif + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H diff --git a/libcxx/include/__filesystem/space_info.h b/libcxx/include/__filesystem/space_info.h new file mode 100644 index 000000000000..098f085678e4 --- /dev/null +++ b/libcxx/include/__filesystem/space_info.h @@ -0,0 +1,35 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_SPACE_INFO_H +#define _LIBCPP___FILESYSTEM_SPACE_INFO_H + +#include <__availability> +#include <__config> +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +struct _LIBCPP_TYPE_VIS space_info { + uintmax_t capacity; + uintmax_t free; + uintmax_t available; +}; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_SPACE_INFO_H diff --git a/libcxx/include/__filesystem/u8path.h b/libcxx/include/__filesystem/u8path.h new file mode 100644 index 000000000000..dca3b0c5028b --- /dev/null +++ b/libcxx/include/__filesystem/u8path.h @@ -0,0 +1,96 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_U8PATH_H +#define _LIBCPP___FILESYSTEM_U8PATH_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T + typename enable_if<__is_pathable<_InputIt>::value, path>::type + u8path(_InputIt __f, _InputIt __l) { + static_assert( +#ifndef _LIBCPP_HAS_NO_CHAR8_T + is_same::__char_type, char8_t>::value || +#endif + is_same::__char_type, char>::value, + "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" + " or 'char8_t'"); +#if defined(_LIBCPP_WIN32API) + string __tmp(__f, __l); + using _CVT = __widen_from_utf8; + _VSTD::wstring __w; + __w.reserve(__tmp.size()); + _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); + return path(__w); +#else + return path(__f, __l); +#endif /* !_LIBCPP_WIN32API */ +} + +#if defined(_LIBCPP_WIN32API) +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T + typename enable_if<__is_pathable<_InputIt>::value, path>::type + u8path(_InputIt __f, _NullSentinel) { + static_assert( +#ifndef _LIBCPP_HAS_NO_CHAR8_T + is_same::__char_type, char8_t>::value || +#endif + is_same::__char_type, char>::value, + "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" + " or 'char8_t'"); + string __tmp; + const char __sentinel = char{}; + for (; *__f != __sentinel; ++__f) + __tmp.push_back(*__f); + using _CVT = __widen_from_utf8; + _VSTD::wstring __w; + __w.reserve(__tmp.size()); + _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); + return path(__w); +} +#endif /* _LIBCPP_WIN32API */ + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T + typename enable_if<__is_pathable<_Source>::value, path>::type + u8path(const _Source& __s) { + static_assert( +#ifndef _LIBCPP_HAS_NO_CHAR8_T + is_same::__char_type, char8_t>::value || +#endif + is_same::__char_type, char>::value, + "u8path(Source const&) requires Source have a character type of type " + "'char' or 'char8_t'"); +#if defined(_LIBCPP_WIN32API) + using _Traits = __is_pathable<_Source>; + return u8path(_VSTD::__unwrap_iter(_Traits::__range_begin(__s)), _VSTD::__unwrap_iter(_Traits::__range_end(__s))); +#else + return path(__s); +#endif +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_U8PATH_H diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index a9a8c1f0da03..59429c13d415 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -37,21 +37,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace __format { -/** The type stored in @ref basic_format_arg. */ +/// The type stored in @ref basic_format_arg. +/// +/// @note The 128-bit types are unconditionally in the list to avoid the values +/// of the enums to depend on the availability of 128-bit integers. enum class _LIBCPP_ENUM_VIS __arg_t : uint8_t { __none, __boolean, __char_type, __int, __long_long, -#ifndef _LIBCPP_HAS_NO_INT128 __i128, -#endif __unsigned, __unsigned_long_long, -#ifndef _LIBCPP_HAS_NO_INT128 __u128, -#endif __float, __double, __long_double, @@ -75,18 +74,22 @@ visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__int); case __format::__arg_t::__long_long: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__long_long); -#ifndef _LIBCPP_HAS_NO_INT128 case __format::__arg_t::__i128: +#ifndef _LIBCPP_HAS_NO_INT128 return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__i128); +#else + _LIBCPP_UNREACHABLE(); #endif case __format::__arg_t::__unsigned: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__unsigned); case __format::__arg_t::__unsigned_long_long: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__unsigned_long_long); -#ifndef _LIBCPP_HAS_NO_INT128 case __format::__arg_t::__u128: +#ifndef _LIBCPP_HAS_NO_INT128 return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__u128); +#else + _LIBCPP_UNREACHABLE(); #endif case __format::__arg_t::__float: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__float); diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index b4fe5cc7b12c..f8ec7c8eb001 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -14,10 +14,9 @@ #include <__config> #include <__format/format_args.h> #include <__format/format_fwd.h> +#include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include -#include -#include #ifndef _LIBCPP_HAS_NO_LOCALIZATION #include diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index 2be36a1ba947..75a81f5184a0 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -16,7 +16,6 @@ #include <__format/format_string.h> #include <__format/formatter.h> #include <__format/parser_std_format_spec.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index 531acdf0a5b2..d7a666743afb 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -28,8 +28,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off - // [iterator.concept.readable] template concept __indirectly_readable_impl = @@ -259,8 +257,6 @@ concept indirectly_movable_storable = // Note: indirectly_swappable is located in iter_swap.h to prevent a dependency cycle // (both iter_swap and indirectly_swappable require indirectly_readable). -// clang-format on - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index b0b89c0eeea3..91dc3055c86d 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -14,8 +14,7 @@ #include <__iterator/readable_traits.h> #include <__ranges/enable_borrowed_range.h> #include <__utility/as_const.h> -#include <__utility/decay_copy.h> -#include <__utility/forward.h> +#include <__utility/auto_cast.h> #include #include @@ -27,24 +26,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off - namespace ranges { template concept __can_borrow = - is_lvalue_reference_v<_Tp> || enable_borrowed_range >; - - template - concept __is_complete = requires { sizeof(_Tp); }; + is_lvalue_reference_v<_Tp> || enable_borrowed_range>; } // namespace ranges // [range.access.begin] -namespace ranges::__begin { + +namespace ranges { +namespace __begin { template concept __member_begin = __can_borrow<_Tp> && requires(_Tp&& __t) { - { _VSTD::__decay_copy(__t.begin()) } -> input_or_output_iterator; + { _LIBCPP_AUTO_CAST(__t.begin()) } -> input_or_output_iterator; }; void begin(auto&) = delete; @@ -54,61 +50,61 @@ namespace ranges::__begin { concept __unqualified_begin = !__member_begin<_Tp> && __can_borrow<_Tp> && - __class_or_enum > && + __class_or_enum> && requires(_Tp && __t) { - { _VSTD::__decay_copy(begin(__t)) } -> input_or_output_iterator; + { _LIBCPP_AUTO_CAST(begin(__t)) } -> input_or_output_iterator; }; struct __fn { template - requires is_array_v> - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const noexcept { - constexpr bool __complete = __is_complete >; - if constexpr (__complete) { // used to disable cryptic diagnostic - return __t + 0; - } - else { - static_assert(__complete, "`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."); - } + requires is_array_v> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const noexcept + { + return __t; } template - requires __member_begin<_Tp> + requires __member_begin<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(__t.begin()))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.begin()))) { - return __t.begin(); + return _LIBCPP_AUTO_CAST(__t.begin()); } template - requires __unqualified_begin<_Tp> + requires __unqualified_begin<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(begin(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(begin(__t)))) { - return begin(__t); + return _LIBCPP_AUTO_CAST(begin(__t)); } void operator()(auto&&) const = delete; }; -} // namespace ranges::__begin +} -namespace ranges { - inline namespace __cpo { - inline constexpr auto begin = __begin::__fn{}; - } // namespace __cpo +inline namespace __cpo { + inline constexpr auto begin = __begin::__fn{}; +} // namespace __cpo +} // namespace ranges + +// [range.range] +namespace ranges { template using iterator_t = decltype(ranges::begin(declval<_Tp&>())); } // namespace ranges // [range.access.end] -namespace ranges::__end { + +namespace ranges { +namespace __end { template concept __member_end = __can_borrow<_Tp> && requires(_Tp&& __t) { typename iterator_t<_Tp>; - { _VSTD::__decay_copy(_VSTD::forward<_Tp>(__t).end()) } -> sentinel_for >; + { _LIBCPP_AUTO_CAST(__t.end()) } -> sentinel_for>; }; void end(auto&) = delete; @@ -118,98 +114,101 @@ namespace ranges::__end { concept __unqualified_end = !__member_end<_Tp> && __can_borrow<_Tp> && - __class_or_enum > && + __class_or_enum> && requires(_Tp && __t) { typename iterator_t<_Tp>; - { _VSTD::__decay_copy(end(_VSTD::forward<_Tp>(__t))) } -> sentinel_for >; + { _LIBCPP_AUTO_CAST(end(__t)) } -> sentinel_for>; }; class __fn { public: template - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[_Np]) const noexcept { - constexpr bool __complete = __is_complete >; - if constexpr (__complete) { // used to disable cryptic diagnostic - return __t + _Np; - } - else { - static_assert(__complete, "`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."); - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[_Np]) const noexcept + requires (sizeof(*__t) != 0) // Disallow incomplete element types. + { + return __t + _Np; } template - requires __member_end<_Tp> + requires __member_end<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(__t.end()))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.end()))) { - return _VSTD::forward<_Tp>(__t).end(); + return _LIBCPP_AUTO_CAST(__t.end()); } template - requires __unqualified_end<_Tp> + requires __unqualified_end<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(end(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(end(__t)))) { - return end(__t); + return _LIBCPP_AUTO_CAST(end(__t)); } void operator()(auto&&) const = delete; }; -} // namespace ranges::__end +} -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto end = __end::__fn{}; -} // namespace ranges::__cpo +} // namespace __cpo +} // namespace ranges -namespace ranges::__cbegin { +// [range.access.cbegin] + +namespace ranges { +namespace __cbegin { struct __fn { template - requires invocable + requires invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const - noexcept(noexcept(ranges::begin(_VSTD::as_const(__t)))) + noexcept(noexcept(ranges::begin(_VSTD::as_const(__t)))) { return ranges::begin(_VSTD::as_const(__t)); } template - requires is_rvalue_reference_v<_Tp> && invocable + requires is_rvalue_reference_v<_Tp> && invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::begin(static_cast<_Tp const&&>(__t)))) + noexcept(noexcept(ranges::begin(static_cast<_Tp const&&>(__t)))) { return ranges::begin(static_cast<_Tp const&&>(__t)); } }; -} // namespace ranges::__cbegin +} -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto cbegin = __cbegin::__fn{}; -} // namespace ranges::__cpo +} // namespace __cpo +} // namespace ranges + +// [range.access.cend] -namespace ranges::__cend { +namespace ranges { +namespace __cend { struct __fn { template - requires invocable + requires invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const - noexcept(noexcept(ranges::end(_VSTD::as_const(__t)))) + noexcept(noexcept(ranges::end(_VSTD::as_const(__t)))) { return ranges::end(_VSTD::as_const(__t)); } template - requires is_rvalue_reference_v<_Tp> && invocable + requires is_rvalue_reference_v<_Tp> && invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::end(static_cast<_Tp const&&>(__t)))) + noexcept(noexcept(ranges::end(static_cast<_Tp const&&>(__t)))) { return ranges::end(static_cast<_Tp const&&>(__t)); } }; -} // namespace ranges::__cend +} -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto cend = __cend::__fn{}; -} // namespace ranges::__cpo - -// clang-format off +} // namespace __cpo +} // namespace ranges #endif // !defined(_LIBCPP_HAS_NO_RANGES) diff --git a/libcxx/include/__ranges/all.h b/libcxx/include/__ranges/all.h index affe13ee0862..ccc77258ba10 100644 --- a/libcxx/include/__ranges/all.h +++ b/libcxx/include/__ranges/all.h @@ -17,7 +17,7 @@ #include <__ranges/range_adaptor.h> #include <__ranges/ref_view.h> #include <__ranges/subrange.h> -#include <__utility/decay_copy.h> +#include <__utility/auto_cast.h> #include <__utility/declval.h> #include <__utility/forward.h> #include @@ -38,9 +38,9 @@ namespace __all { requires ranges::view> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(_VSTD::forward<_Tp>(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(_VSTD::forward<_Tp>(__t)))) { - return _VSTD::forward<_Tp>(__t); + return _LIBCPP_AUTO_CAST(_VSTD::forward<_Tp>(__t)); } template diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h index 6a8364006beb..bad23c8c4bfb 100644 --- a/libcxx/include/__ranges/concepts.h +++ b/libcxx/include/__ranges/concepts.h @@ -29,8 +29,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD -// clang-format off - #if !defined(_LIBCPP_HAS_NO_RANGES) namespace ranges { @@ -126,8 +124,6 @@ namespace ranges { #endif // !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format on - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___RANGES_CONCEPTS_H diff --git a/libcxx/include/__ranges/data.h b/libcxx/include/__ranges/data.h index 7eade494cceb..cc151c59f3d7 100644 --- a/libcxx/include/__ranges/data.h +++ b/libcxx/include/__ranges/data.h @@ -26,9 +26,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off -namespace ranges { // [range.prim.data] + +namespace ranges { namespace __data { template concept __ptr_to_object = is_pointer_v<_Tp> && is_object_v>; @@ -65,15 +65,13 @@ namespace __data { return _VSTD::to_address(ranges::begin(_VSTD::forward<_Tp>(__t))); } }; -} // end namespace __data +} inline namespace __cpo { inline constexpr auto data = __data::__fn{}; } // namespace __cpo } // namespace ranges -// clang-format off - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h index fc6a938fd86e..e8a8aabf4aed 100644 --- a/libcxx/include/__ranges/empty.h +++ b/libcxx/include/__ranges/empty.h @@ -13,7 +13,6 @@ #include <__iterator/concepts.h> #include <__ranges/access.h> #include <__ranges/size.h> -#include <__utility/forward.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -24,19 +23,19 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off -namespace ranges { // [range.prim.empty] + +namespace ranges { namespace __empty { template concept __member_empty = requires(_Tp&& __t) { - bool(_VSTD::forward<_Tp>(__t).empty()); + bool(__t.empty()); }; template concept __can_invoke_size = !__member_empty<_Tp> && - requires(_Tp&& __t) { ranges::size(_VSTD::forward<_Tp>(__t)); }; + requires(_Tp&& __t) { ranges::size(__t); }; template concept __can_compare_begin_end = @@ -51,13 +50,13 @@ namespace __empty { template <__member_empty _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Tp&& __t) const noexcept(noexcept(bool(__t.empty()))) { - return __t.empty(); + return bool(__t.empty()); } template <__can_invoke_size _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Tp&& __t) const - noexcept(noexcept(ranges::size(_VSTD::forward<_Tp>(__t)))) { - return ranges::size(_VSTD::forward<_Tp>(__t)) == 0; + noexcept(noexcept(ranges::size(__t))) { + return ranges::size(__t) == 0; } template<__can_compare_begin_end _Tp> @@ -72,7 +71,6 @@ inline namespace __cpo { inline constexpr auto empty = __empty::__fn{}; } // namespace __cpo } // namespace ranges -// clang-format off #endif // !defined(_LIBCPP_HAS_NO_RANGES) diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index af0a8479f2ec..fc6641cf4887 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -13,8 +13,7 @@ #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> -#include <__utility/decay_copy.h> -#include <__utility/forward.h> +#include <__utility/auto_cast.h> #include #include @@ -26,12 +25,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off namespace ranges { -template -inline constexpr bool disable_sized_range = false; + template + inline constexpr bool disable_sized_range = false; +} // [range.prim.size] + +namespace ranges { namespace __size { void size(auto&) = delete; void size(const auto&) = delete; @@ -41,7 +42,7 @@ namespace __size { template concept __member_size = __size_enabled<_Tp> && requires(_Tp&& __t) { - { _VSTD::__decay_copy(_VSTD::forward<_Tp>(__t).size()) } -> __integer_like; + { _LIBCPP_AUTO_CAST(__t.size()) } -> __integer_like; }; template @@ -50,7 +51,7 @@ namespace __size { !__member_size<_Tp> && __class_or_enum> && requires(_Tp&& __t) { - { _VSTD::__decay_copy(size(_VSTD::forward<_Tp>(__t))) } -> __integer_like; + { _LIBCPP_AUTO_CAST(size(__t)) } -> __integer_like; }; template @@ -76,14 +77,14 @@ namespace __size { template <__member_size _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __integer_like auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::forward<_Tp>(__t).size())) { - return _VSTD::forward<_Tp>(__t).size(); + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.size()))) { + return _LIBCPP_AUTO_CAST(__t.size()); } template <__unqualified_size _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __integer_like auto operator()(_Tp&& __t) const - noexcept(noexcept(size(_VSTD::forward<_Tp>(__t)))) { - return size(_VSTD::forward<_Tp>(__t)); + noexcept(noexcept(_LIBCPP_AUTO_CAST(size(__t)))) { + return _LIBCPP_AUTO_CAST(size(__t)); } template<__difference _Tp> @@ -92,18 +93,23 @@ namespace __size { return _VSTD::__to_unsigned_like(ranges::end(__t) - ranges::begin(__t)); } }; -} // end namespace __size +} inline namespace __cpo { inline constexpr auto size = __size::__fn{}; } // namespace __cpo +} // namespace ranges + +// [range.prim.ssize] +namespace ranges { namespace __ssize { struct __fn { template requires requires (_Tp&& __t) { ranges::size(__t); } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr integral auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::size(__t))) { + noexcept(noexcept(ranges::size(__t))) + { using _Signed = make_signed_t; if constexpr (sizeof(ptrdiff_t) > sizeof(_Signed)) return static_cast(ranges::size(__t)); @@ -118,8 +124,6 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -// clang-format off - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__utility/decay_copy.h b/libcxx/include/__utility/auto_cast.h similarity index 53% rename from libcxx/include/__utility/decay_copy.h rename to libcxx/include/__utility/auto_cast.h index 32238e117f56..5c368e077508 100644 --- a/libcxx/include/__utility/decay_copy.h +++ b/libcxx/include/__utility/auto_cast.h @@ -7,29 +7,16 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___UTILITY_DECAY_COPY_H -#define _LIBCPP___UTILITY_DECAY_COPY_H +#ifndef _LIBCPP___UTILITY_AUTO_CAST_H +#define _LIBCPP___UTILITY_AUTO_CAST_H #include <__config> -#include <__utility/forward.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif -_LIBCPP_BEGIN_NAMESPACE_STD +#define _LIBCPP_AUTO_CAST(expr) static_cast::type>(expr) -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR -typename decay<_Tp>::type __decay_copy(_Tp&& __t) -#if _LIBCPP_STD_VER > 17 - noexcept(is_nothrow_convertible_v<_Tp, decay_t<_Tp>>) -#endif -{ - return _VSTD::forward<_Tp>(__t); -} - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___UTILITY_DECAY_COPY_H +#endif // _LIBCPP___UTILITY_AUTO_CAST_H diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 39e8ca2e814b..9f5b42747b34 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -238,31 +238,26 @@ inline constexpr bool std::ranges::enable_view #include <__config> -#include <__debug> -#include <__ranges/enable_borrowed_range.h> -#include <__ranges/enable_view.h> -#include <__utility/forward.h> -#include +#include <__filesystem/copy_options.h> +#include <__filesystem/directory_entry.h> +#include <__filesystem/directory_iterator.h> +#include <__filesystem/directory_options.h> +#include <__filesystem/file_status.h> +#include <__filesystem/file_time_type.h> +#include <__filesystem/file_type.h> +#include <__filesystem/filesystem_error.h> +#include <__filesystem/operations.h> +#include <__filesystem/path_iterator.h> +#include <__filesystem/path.h> +#include <__filesystem/perm_options.h> +#include <__filesystem/perms.h> +#include <__filesystem/recursive_directory_iterator.h> +#include <__filesystem/space_info.h> +#include <__filesystem/u8path.h> #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -# include // for quoted -#endif - #if defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) # error "The Filesystem library is not supported since libc++ has been configured with LIBCXX_ENABLE_FILESYSTEM disabled" #endif @@ -271,2775 +266,4 @@ inline constexpr bool std::ranges::enable_view - -#ifndef _LIBCPP_CXX03_LANG - -_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM - -_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH - -typedef chrono::time_point<_FilesystemClock> file_time_type; - -struct _LIBCPP_TYPE_VIS space_info { - uintmax_t capacity; - uintmax_t free; - uintmax_t available; -}; - -// On Windows, the library never identifies files as block, character, fifo -// or socket. -enum class _LIBCPP_ENUM_VIS file_type : signed char { - none = 0, - not_found = -1, - regular = 1, - directory = 2, - symlink = 3, - block = 4, - character = 5, - fifo = 6, - socket = 7, - unknown = 8 -}; - -// On Windows, these permission bits map to one single readonly flag per -// file, and the executable bit is always returned as set. When setting -// permissions, as long as the write bit is set for either owner, group or -// others, the readonly flag is cleared. -enum class _LIBCPP_ENUM_VIS perms : unsigned { - none = 0, - - owner_read = 0400, - owner_write = 0200, - owner_exec = 0100, - owner_all = 0700, - - group_read = 040, - group_write = 020, - group_exec = 010, - group_all = 070, - - others_read = 04, - others_write = 02, - others_exec = 01, - others_all = 07, - - all = 0777, - - set_uid = 04000, - set_gid = 02000, - sticky_bit = 01000, - mask = 07777, - unknown = 0xFFFF, -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator&(perms _LHS, perms _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator|(perms _LHS, perms _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator^(perms _LHS, perms _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator~(perms _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator&=(perms& _LHS, perms _RHS) { return _LHS = _LHS & _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator|=(perms& _LHS, perms _RHS) { return _LHS = _LHS | _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator^=(perms& _LHS, perms _RHS) { return _LHS = _LHS ^ _RHS; } - -enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { - replace = 1, - add = 2, - remove = 4, - nofollow = 8 -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator&(perm_options _LHS, perm_options _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator|(perm_options _LHS, perm_options _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator^(perm_options _LHS, perm_options _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator~(perm_options _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS & _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS | _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS ^ _RHS; -} - -enum class _LIBCPP_ENUM_VIS copy_options : unsigned short { - none = 0, - skip_existing = 1, - overwrite_existing = 2, - update_existing = 4, - recursive = 8, - copy_symlinks = 16, - skip_symlinks = 32, - directories_only = 64, - create_symlinks = 128, - create_hard_links = 256, - __in_recursive_copy = 512, -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator&(copy_options _LHS, copy_options _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator|(copy_options _LHS, copy_options _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator^(copy_options _LHS, copy_options _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator~(copy_options _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS & _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS | _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS ^ _RHS; -} - -enum class _LIBCPP_ENUM_VIS directory_options : unsigned char { - none = 0, - follow_directory_symlink = 1, - skip_permission_denied = 2 -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator&(directory_options _LHS, - directory_options _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator|(directory_options _LHS, - directory_options _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator^(directory_options _LHS, - directory_options _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator~(directory_options _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator&=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS & _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator|=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS | _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator^=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS ^ _RHS; -} - -class _LIBCPP_TYPE_VIS file_status { -public: - // constructors - _LIBCPP_INLINE_VISIBILITY - file_status() noexcept : file_status(file_type::none) {} - _LIBCPP_INLINE_VISIBILITY - explicit file_status(file_type __ft, perms __prms = perms::unknown) noexcept - : __ft_(__ft), - __prms_(__prms) {} - - file_status(const file_status&) noexcept = default; - file_status(file_status&&) noexcept = default; - - _LIBCPP_INLINE_VISIBILITY - ~file_status() {} - - file_status& operator=(const file_status&) noexcept = default; - file_status& operator=(file_status&&) noexcept = default; - - // observers - _LIBCPP_INLINE_VISIBILITY - file_type type() const noexcept { return __ft_; } - - _LIBCPP_INLINE_VISIBILITY - perms permissions() const noexcept { return __prms_; } - - // modifiers - _LIBCPP_INLINE_VISIBILITY - void type(file_type __ft) noexcept { __ft_ = __ft; } - - _LIBCPP_INLINE_VISIBILITY - void permissions(perms __p) noexcept { __prms_ = __p; } - -private: - file_type __ft_; - perms __prms_; -}; - -class _LIBCPP_TYPE_VIS directory_entry; - -template -struct __can_convert_char { - static const bool value = false; -}; -template -struct __can_convert_char : public __can_convert_char<_Tp> {}; -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char; -}; -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = wchar_t; -}; -#ifndef _LIBCPP_HAS_NO_CHAR8_T -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char8_t; -}; -#endif -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char16_t; -}; -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char32_t; -}; - -template -typename enable_if<__can_convert_char<_ECharT>::value, bool>::type -__is_separator(_ECharT __e) { -#if defined(_LIBCPP_WIN32API) - return __e == _ECharT('/') || __e == _ECharT('\\'); -#else - return __e == _ECharT('/'); -#endif -} - -#ifndef _LIBCPP_HAS_NO_CHAR8_T -typedef u8string __u8_string; -#else -typedef string __u8_string; -#endif - -struct _NullSentinel {}; - -template -using _Void = void; - -template -struct __is_pathable_string : public false_type {}; - -template -struct __is_pathable_string< - basic_string<_ECharT, _Traits, _Alloc>, - _Void::__char_type> > - : public __can_convert_char<_ECharT> { - using _Str = basic_string<_ECharT, _Traits, _Alloc>; - using _Base = __can_convert_char<_ECharT>; - static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } - static _ECharT const* __range_end(_Str const& __s) { - return __s.data() + __s.length(); - } - static _ECharT __first_or_null(_Str const& __s) { - return __s.empty() ? _ECharT{} : __s[0]; - } -}; - -template -struct __is_pathable_string< - basic_string_view<_ECharT, _Traits>, - _Void::__char_type> > - : public __can_convert_char<_ECharT> { - using _Str = basic_string_view<_ECharT, _Traits>; - using _Base = __can_convert_char<_ECharT>; - static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } - static _ECharT const* __range_end(_Str const& __s) { - return __s.data() + __s.length(); - } - static _ECharT __first_or_null(_Str const& __s) { - return __s.empty() ? _ECharT{} : __s[0]; - } -}; - -template ::type, - class _UnqualPtrType = - typename remove_const::type>::type, - bool _IsCharPtr = is_pointer<_DS>::value&& - __can_convert_char<_UnqualPtrType>::value> -struct __is_pathable_char_array : false_type {}; - -template -struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> - : __can_convert_char::type> { - using _Base = __can_convert_char::type>; - - static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } - static _ECharT const* __range_end(const _ECharT* __b) { - using _Iter = const _ECharT*; - const _ECharT __sentinel = _ECharT{}; - _Iter __e = __b; - for (; *__e != __sentinel; ++__e) - ; - return __e; - } - - static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } -}; - -template ::value, - class = void> -struct __is_pathable_iter : false_type {}; - -template -struct __is_pathable_iter< - _Iter, true, - _Void::value_type>::__char_type> > - : __can_convert_char::value_type> { - using _ECharT = typename iterator_traits<_Iter>::value_type; - using _Base = __can_convert_char<_ECharT>; - - static _Iter __range_begin(_Iter __b) { return __b; } - static _NullSentinel __range_end(_Iter) { return _NullSentinel{}; } - - static _ECharT __first_or_null(_Iter __b) { return *__b; } -}; - -template ::value, - bool _IsCharIterT = __is_pathable_char_array<_Tp>::value, - bool _IsIterT = !_IsCharIterT && __is_pathable_iter<_Tp>::value> -struct __is_pathable : false_type { - static_assert(!_IsStringT && !_IsCharIterT && !_IsIterT, "Must all be false"); -}; - -template -struct __is_pathable<_Tp, true, false, false> : __is_pathable_string<_Tp> {}; - -template -struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> { -}; - -template -struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {}; - -#if defined(_LIBCPP_WIN32API) -typedef wstring __path_string; -typedef wchar_t __path_value; -#else -typedef string __path_string; -typedef char __path_value; -#endif - -#if defined(_LIBCPP_WIN32API) -_LIBCPP_FUNC_VIS -size_t __wide_to_char(const wstring&, char*, size_t); -_LIBCPP_FUNC_VIS -size_t __char_to_wide(const string&, wchar_t*, size_t); -#endif - -template -struct _PathCVT; - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -template -struct _PathCVT { - static_assert(__can_convert_char<_ECharT>::value, - "Char type not convertible"); - - typedef __narrow_to_utf8 _Narrower; -#if defined(_LIBCPP_WIN32API) - typedef __widen_from_utf8 _Widener; -#endif - - static void __append_range(__path_string& __dest, _ECharT const* __b, - _ECharT const* __e) { -#if defined(_LIBCPP_WIN32API) - string __utf8; - _Narrower()(back_inserter(__utf8), __b, __e); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); -#else - _Narrower()(back_inserter(__dest), __b, __e); -#endif - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); - if (__b == __e) - return; - basic_string<_ECharT> __tmp(__b, __e); -#if defined(_LIBCPP_WIN32API) - string __utf8; - _Narrower()(back_inserter(__utf8), __tmp.data(), - __tmp.data() + __tmp.length()); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); -#else - _Narrower()(back_inserter(__dest), __tmp.data(), - __tmp.data() + __tmp.length()); -#endif - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { - static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); - const _ECharT __sentinel = _ECharT{}; - if (*__b == __sentinel) - return; - basic_string<_ECharT> __tmp; - for (; *__b != __sentinel; ++__b) - __tmp.push_back(*__b); -#if defined(_LIBCPP_WIN32API) - string __utf8; - _Narrower()(back_inserter(__utf8), __tmp.data(), - __tmp.data() + __tmp.length()); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); -#else - _Narrower()(back_inserter(__dest), __tmp.data(), - __tmp.data() + __tmp.length()); -#endif - } - - template - static void __append_source(__path_string& __dest, _Source const& __s) { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; -#endif // !_LIBCPP_HAS_NO_LOCALIZATION - -template <> -struct _PathCVT<__path_value> { - - template - static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - for (; __b != __e; ++__b) - __dest.push_back(*__b); - } - - template - static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - __dest.append(__b, __e); - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { - const char __sentinel = char{}; - for (; *__b != __sentinel; ++__b) - __dest.push_back(*__b); - } - - template - static void __append_source(__path_string& __dest, _Source const& __s) { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; - -#if defined(_LIBCPP_WIN32API) -template <> -struct _PathCVT { - - static void - __append_string(__path_string& __dest, const basic_string &__str) { - size_t __size = __char_to_wide(__str, nullptr, 0); - size_t __pos = __dest.size(); - __dest.resize(__pos + __size); - __char_to_wide(__str, const_cast<__path_value*>(__dest.data()) + __pos, __size); - } - - template - static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - basic_string __tmp(__b, __e); - __append_string(__dest, __tmp); - } - - template - static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - basic_string __tmp(__b, __e); - __append_string(__dest, __tmp); - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { - const char __sentinel = char{}; - basic_string __tmp; - for (; *__b != __sentinel; ++__b) - __tmp.push_back(*__b); - __append_string(__dest, __tmp); - } - - template - static void __append_source(__path_string& __dest, _Source const& __s) { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; - -template -struct _PathExport { - typedef __narrow_to_utf8 _Narrower; - typedef __widen_from_utf8 _Widener; - - template - static void __append(_Str& __dest, const __path_string& __src) { - string __utf8; - _Narrower()(back_inserter(__utf8), __src.data(), __src.data() + __src.size()); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); - } -}; - -template <> -struct _PathExport { - template - static void __append(_Str& __dest, const __path_string& __src) { - size_t __size = __wide_to_char(__src, nullptr, 0); - size_t __pos = __dest.size(); - __dest.resize(__size); - __wide_to_char(__src, const_cast(__dest.data()) + __pos, __size); - } -}; - -template <> -struct _PathExport { - template - static void __append(_Str& __dest, const __path_string& __src) { - __dest.append(__src.begin(), __src.end()); - } -}; - -template <> -struct _PathExport { - template - static void __append(_Str& __dest, const __path_string& __src) { - __dest.append(__src.begin(), __src.end()); - } -}; - -#ifndef _LIBCPP_HAS_NO_CHAR8_T -template <> -struct _PathExport { - typedef __narrow_to_utf8 _Narrower; - - template - static void __append(_Str& __dest, const __path_string& __src) { - _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); - } -}; -#endif /* !_LIBCPP_HAS_NO_CHAR8_T */ -#endif /* _LIBCPP_WIN32API */ - -class _LIBCPP_TYPE_VIS path { - template - using _EnableIfPathable = - typename enable_if<__is_pathable<_SourceOrIter>::value, _Tp>::type; - - template - using _SourceChar = typename __is_pathable<_Tp>::__char_type; - - template - using _SourceCVT = _PathCVT<_SourceChar<_Tp> >; - -public: -#if defined(_LIBCPP_WIN32API) - typedef wchar_t value_type; - static constexpr value_type preferred_separator = L'\\'; -#else - typedef char value_type; - static constexpr value_type preferred_separator = '/'; -#endif - typedef basic_string string_type; - typedef basic_string_view __string_view; - - enum _LIBCPP_ENUM_VIS format : unsigned char { - auto_format, - native_format, - generic_format - }; - - // constructors and destructor - _LIBCPP_INLINE_VISIBILITY path() noexcept {} - _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} - _LIBCPP_INLINE_VISIBILITY path(path&& __p) noexcept - : __pn_(_VSTD::move(__p.__pn_)) {} - - _LIBCPP_INLINE_VISIBILITY - path(string_type&& __s, format = format::auto_format) noexcept - : __pn_(_VSTD::move(__s)) {} - - template > - path(const _Source& __src, format = format::auto_format) { - _SourceCVT<_Source>::__append_source(__pn_, __src); - } - - template - path(_InputIt __first, _InputIt __last, format = format::auto_format) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - } - -/* -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - // TODO Implement locale conversions. - template > - path(const _Source& __src, const locale& __loc, format = format::auto_format); - template - path(_InputIt __first, _InputIt _last, const locale& __loc, - format = format::auto_format); -#endif -*/ - - _LIBCPP_INLINE_VISIBILITY - ~path() = default; - - // assignments - _LIBCPP_INLINE_VISIBILITY - path& operator=(const path& __p) { - __pn_ = __p.__pn_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator=(path&& __p) noexcept { - __pn_ = _VSTD::move(__p.__pn_); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator=(string_type&& __s) noexcept { - __pn_ = _VSTD::move(__s); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& assign(string_type&& __s) noexcept { - __pn_ = _VSTD::move(__s); - return *this; - } - - template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> - operator=(const _Source& __src) { - return this->assign(__src); - } - - template - _EnableIfPathable<_Source> assign(const _Source& __src) { - __pn_.clear(); - _SourceCVT<_Source>::__append_source(__pn_, __src); - return *this; - } - - template - path& assign(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - __pn_.clear(); - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - return *this; - } - -public: - // appends -#if defined(_LIBCPP_WIN32API) - path& operator/=(const path& __p) { - auto __p_root_name = __p.__root_name(); - auto __p_root_name_size = __p_root_name.size(); - if (__p.is_absolute() || - (!__p_root_name.empty() && __p_root_name != __string_view(root_name().__pn_))) { - __pn_ = __p.__pn_; - return *this; - } - if (__p.has_root_directory()) { - path __root_name_str = root_name(); - __pn_ = __root_name_str.native(); - __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); - return *this; - } - if (has_filename() || (!has_root_directory() && is_absolute())) - __pn_ += preferred_separator; - __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); - return *this; - } - template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> - operator/=(const _Source& __src) { - return operator/=(path(__src)); - } - - template - _EnableIfPathable<_Source> append(const _Source& __src) { - return operator/=(path(__src)); - } - - template - path& append(_InputIt __first, _InputIt __last) { - return operator/=(path(__first, __last)); - } -#else - path& operator/=(const path& __p) { - if (__p.is_absolute()) { - __pn_ = __p.__pn_; - return *this; - } - if (has_filename()) - __pn_ += preferred_separator; - __pn_ += __p.native(); - return *this; - } - - // FIXME: Use _LIBCPP_DIAGNOSE_WARNING to produce a diagnostic when __src - // is known at compile time to be "/' since the user almost certainly intended - // to append a separator instead of overwriting the path with "/" - template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> - operator/=(const _Source& __src) { - return this->append(__src); - } - - template - _EnableIfPathable<_Source> append(const _Source& __src) { - using _Traits = __is_pathable<_Source>; - using _CVT = _PathCVT<_SourceChar<_Source> >; - bool __source_is_absolute = __is_separator(_Traits::__first_or_null(__src)); - if (__source_is_absolute) - __pn_.clear(); - else if (has_filename()) - __pn_ += preferred_separator; - _CVT::__append_source(__pn_, __src); - return *this; - } - - template - path& append(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); - using _CVT = _PathCVT<_ItVal>; - if (__first != __last && __is_separator(*__first)) - __pn_.clear(); - else if (has_filename()) - __pn_ += preferred_separator; - _CVT::__append_range(__pn_, __first, __last); - return *this; - } -#endif - - // concatenation - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const path& __x) { - __pn_ += __x.__pn_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const string_type& __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(__string_view __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const value_type* __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(value_type __x) { - __pn_ += __x; - return *this; - } - - template - typename enable_if<__can_convert_char<_ECharT>::value, path&>::type - operator+=(_ECharT __x) { - _PathCVT<_ECharT>::__append_source(__pn_, - basic_string_view<_ECharT>(&__x, 1)); - return *this; - } - - template - _EnableIfPathable<_Source> operator+=(const _Source& __x) { - return this->concat(__x); - } - - template - _EnableIfPathable<_Source> concat(const _Source& __x) { - _SourceCVT<_Source>::__append_source(__pn_, __x); - return *this; - } - - template - path& concat(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - return *this; - } - - // modifiers - _LIBCPP_INLINE_VISIBILITY - void clear() noexcept { __pn_.clear(); } - - path& make_preferred() { -#if defined(_LIBCPP_WIN32API) - _VSTD::replace(__pn_.begin(), __pn_.end(), L'/', L'\\'); -#endif - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& remove_filename() { - auto __fname = __filename(); - if (!__fname.empty()) - __pn_.erase(__fname.data() - __pn_.data()); - return *this; - } - - path& replace_filename(const path& __replacement) { - remove_filename(); - return (*this /= __replacement); - } - - path& replace_extension(const path& __replacement = path()); - - _LIBCPP_INLINE_VISIBILITY - void swap(path& __rhs) noexcept { __pn_.swap(__rhs.__pn_); } - - // private helper to allow reserving memory in the path - _LIBCPP_INLINE_VISIBILITY - void __reserve(size_t __s) { __pn_.reserve(__s); } - - // native format observers - _LIBCPP_INLINE_VISIBILITY - const string_type& native() const noexcept { return __pn_; } - - _LIBCPP_INLINE_VISIBILITY - const value_type* c_str() const noexcept { return __pn_.c_str(); } - - _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } - -#if defined(_LIBCPP_WIN32API) - _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { return __pn_; } - - _VSTD::wstring generic_wstring() const { - _VSTD::wstring __s; - __s.resize(__pn_.size()); - _VSTD::replace_copy(__pn_.begin(), __pn_.end(), __s.begin(), '\\', '/'); - return __s; - } - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - string(const _Allocator& __a = _Allocator()) const { - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s(__a); - __s.reserve(__pn_.size()); - _PathExport<_ECharT>::__append(__s, __pn_); - return __s; - } - - _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { - return string(); - } - _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const { - using _CVT = __narrow_to_utf8; - __u8_string __s; - __s.reserve(__pn_.size()); - _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); - return __s; - } - - _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { - return string(); - } - _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { - return string(); - } - - // generic format observers - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - generic_string(const _Allocator& __a = _Allocator()) const { - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s = string<_ECharT, _Traits, _Allocator>(__a); - // Note: This (and generic_u8string below) is slightly suboptimal as - // it iterates twice over the string; once to convert it to the right - // character type, and once to replace path delimiters. - _VSTD::replace(__s.begin(), __s.end(), - static_cast<_ECharT>('\\'), static_cast<_ECharT>('/')); - return __s; - } - - _VSTD::string generic_string() const { return generic_string(); } - _VSTD::u16string generic_u16string() const { return generic_string(); } - _VSTD::u32string generic_u32string() const { return generic_string(); } - __u8_string generic_u8string() const { - __u8_string __s = u8string(); - _VSTD::replace(__s.begin(), __s.end(), '\\', '/'); - return __s; - } -#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ -#else /* _LIBCPP_WIN32API */ - - _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { return __pn_; } -#ifndef _LIBCPP_HAS_NO_CHAR8_T - _LIBCPP_INLINE_VISIBILITY _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } -#else - _LIBCPP_INLINE_VISIBILITY _VSTD::string u8string() const { return __pn_; } -#endif - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - string(const _Allocator& __a = _Allocator()) const { - using _CVT = __widen_from_utf8; - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s(__a); - __s.reserve(__pn_.size()); - _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); - return __s; - } - -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS - _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { - return string(); - } -#endif - _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { - return string(); - } - _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { - return string(); - } -#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ - - // generic format observers - _VSTD::string generic_string() const { return __pn_; } -#ifndef _LIBCPP_HAS_NO_CHAR8_T - _VSTD::u8string generic_u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } -#else - _VSTD::string generic_u8string() const { return __pn_; } -#endif - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - generic_string(const _Allocator& __a = _Allocator()) const { - return string<_ECharT, _Traits, _Allocator>(__a); - } - -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS - _VSTD::wstring generic_wstring() const { return string(); } -#endif - _VSTD::u16string generic_u16string() const { return string(); } - _VSTD::u32string generic_u32string() const { return string(); } -#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ -#endif /* !_LIBCPP_WIN32API */ - -private: - int __compare(__string_view) const; - __string_view __root_name() const; - __string_view __root_directory() const; - __string_view __root_path_raw() const; - __string_view __relative_path() const; - __string_view __parent_path() const; - __string_view __filename() const; - __string_view __stem() const; - __string_view __extension() const; - -public: - // compare - _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const noexcept { - return __compare(__p.__pn_); - } - _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { - return __compare(__s); - } - _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { - return __compare(__s); - } - _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { - return __compare(__s); - } - - // decomposition - _LIBCPP_INLINE_VISIBILITY path root_name() const { - return string_type(__root_name()); - } - _LIBCPP_INLINE_VISIBILITY path root_directory() const { - return string_type(__root_directory()); - } - _LIBCPP_INLINE_VISIBILITY path root_path() const { -#if defined(_LIBCPP_WIN32API) - return string_type(__root_path_raw()); -#else - return root_name().append(string_type(__root_directory())); -#endif - } - _LIBCPP_INLINE_VISIBILITY path relative_path() const { - return string_type(__relative_path()); - } - _LIBCPP_INLINE_VISIBILITY path parent_path() const { - return string_type(__parent_path()); - } - _LIBCPP_INLINE_VISIBILITY path filename() const { - return string_type(__filename()); - } - _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem()); } - _LIBCPP_INLINE_VISIBILITY path extension() const { - return string_type(__extension()); - } - - // query - _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool - empty() const noexcept { - return __pn_.empty(); - } - - _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { - return !__root_name().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { - return !__root_directory().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { - return !__root_path_raw().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { - return !__relative_path().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { - return !__parent_path().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_filename() const { - return !__filename().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_extension() const { - return !__extension().empty(); - } - - _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { -#if defined(_LIBCPP_WIN32API) - __string_view __root_name_str = __root_name(); - __string_view __root_dir = __root_directory(); - if (__root_name_str.size() == 2 && __root_name_str[1] == ':') { - // A drive letter with no root directory is relative, e.g. x:example. - return !__root_dir.empty(); - } - // If no root name, it's relative, e.g. \example is relative to the current drive - if (__root_name_str.empty()) - return false; - if (__root_name_str.size() < 3) - return false; - // A server root name, like \\server, is always absolute - if (__root_name_str[0] != '/' && __root_name_str[0] != '\\') - return false; - if (__root_name_str[1] != '/' && __root_name_str[1] != '\\') - return false; - // Seems to be a server root name - return true; -#else - return has_root_directory(); -#endif - } - _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } - - // relative paths - path lexically_normal() const; - path lexically_relative(const path& __base) const; - - _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { - path __result = this->lexically_relative(__base); - if (__result.native().empty()) - return *this; - return __result; - } - - // iterators - class _LIBCPP_TYPE_VIS iterator; - typedef iterator const_iterator; - - iterator begin() const; - iterator end() const; - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template - _LIBCPP_INLINE_VISIBILITY friend - typename enable_if::value && - is_same<_Traits, char_traits >::value, - basic_ostream<_CharT, _Traits>&>::type - operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { - __os << _VSTD::__quoted(__p.native()); - return __os; - } - - template - _LIBCPP_INLINE_VISIBILITY friend - typename enable_if::value || - !is_same<_Traits, char_traits >::value, - basic_ostream<_CharT, _Traits>&>::type - operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { - __os << _VSTD::__quoted(__p.string<_CharT, _Traits>()); - return __os; - } - - template - _LIBCPP_INLINE_VISIBILITY friend basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) { - basic_string<_CharT, _Traits> __tmp; - __is >> __quoted(__tmp); - __p = __tmp; - return __is; - } -#endif // !_LIBCPP_HAS_NO_LOCALIZATION - - friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) == 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) != 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) < 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) <= 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) > 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) >= 0; - } - - friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, - const path& __rhs) { - path __result(__lhs); - __result /= __rhs; - return __result; - } -private: - inline _LIBCPP_INLINE_VISIBILITY path& - __assign_view(__string_view const& __s) noexcept { - __pn_ = string_type(__s); - return *this; - } - string_type __pn_; -}; - -inline _LIBCPP_INLINE_VISIBILITY void swap(path& __lhs, path& __rhs) noexcept { - __lhs.swap(__rhs); -} - -_LIBCPP_FUNC_VIS -size_t hash_value(const path& __p) noexcept; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T - typename enable_if<__is_pathable<_InputIt>::value, path>::type - u8path(_InputIt __f, _InputIt __l) { - static_assert( -#ifndef _LIBCPP_HAS_NO_CHAR8_T - is_same::__char_type, char8_t>::value || -#endif - is_same::__char_type, char>::value, - "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" - " or 'char8_t'"); -#if defined(_LIBCPP_WIN32API) - string __tmp(__f, __l); - using _CVT = __widen_from_utf8; - _VSTD::wstring __w; - __w.reserve(__tmp.size()); - _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); - return path(__w); -#else - return path(__f, __l); -#endif /* !_LIBCPP_WIN32API */ -} - -#if defined(_LIBCPP_WIN32API) -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T - typename enable_if<__is_pathable<_InputIt>::value, path>::type - u8path(_InputIt __f, _NullSentinel) { - static_assert( -#ifndef _LIBCPP_HAS_NO_CHAR8_T - is_same::__char_type, char8_t>::value || -#endif - is_same::__char_type, char>::value, - "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" - " or 'char8_t'"); - string __tmp; - const char __sentinel = char{}; - for (; *__f != __sentinel; ++__f) - __tmp.push_back(*__f); - using _CVT = __widen_from_utf8; - _VSTD::wstring __w; - __w.reserve(__tmp.size()); - _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); - return path(__w); -} -#endif /* _LIBCPP_WIN32API */ - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T - typename enable_if<__is_pathable<_Source>::value, path>::type - u8path(const _Source& __s) { - static_assert( -#ifndef _LIBCPP_HAS_NO_CHAR8_T - is_same::__char_type, char8_t>::value || -#endif - is_same::__char_type, char>::value, - "u8path(Source const&) requires Source have a character type of type " - "'char' or 'char8_t'"); -#if defined(_LIBCPP_WIN32API) - using _Traits = __is_pathable<_Source>; - return u8path(_VSTD::__unwrap_iter(_Traits::__range_begin(__s)), _VSTD::__unwrap_iter(_Traits::__range_end(__s))); -#else - return path(__s); -#endif -} - -class _LIBCPP_TYPE_VIS path::iterator { -public: - enum _ParserState : unsigned char { - _Singular, - _BeforeBegin, - _InRootName, - _InRootDir, - _InFilenames, - _InTrailingSep, - _AtEnd - }; - -public: - typedef bidirectional_iterator_tag iterator_category; - - typedef path value_type; - typedef ptrdiff_t difference_type; - typedef const path* pointer; - typedef const path& reference; - - typedef void - __stashing_iterator_tag; // See reverse_iterator and __is_stashing_iterator - -public: - _LIBCPP_INLINE_VISIBILITY - iterator() - : __stashed_elem_(), __path_ptr_(nullptr), __entry_(), - __state_(_Singular) {} - - iterator(const iterator&) = default; - ~iterator() = default; - - iterator& operator=(const iterator&) = default; - - _LIBCPP_INLINE_VISIBILITY - reference operator*() const { return __stashed_elem_; } - - _LIBCPP_INLINE_VISIBILITY - pointer operator->() const { return &__stashed_elem_; } - - _LIBCPP_INLINE_VISIBILITY - iterator& operator++() { - _LIBCPP_ASSERT(__state_ != _Singular, - "attempting to increment a singular iterator"); - _LIBCPP_ASSERT(__state_ != _AtEnd, - "attempting to increment the end iterator"); - return __increment(); - } - - _LIBCPP_INLINE_VISIBILITY - iterator operator++(int) { - iterator __it(*this); - this->operator++(); - return __it; - } - - _LIBCPP_INLINE_VISIBILITY - iterator& operator--() { - _LIBCPP_ASSERT(__state_ != _Singular, - "attempting to decrement a singular iterator"); - _LIBCPP_ASSERT(__entry_.data() != __path_ptr_->native().data(), - "attempting to decrement the begin iterator"); - return __decrement(); - } - - _LIBCPP_INLINE_VISIBILITY - iterator operator--(int) { - iterator __it(*this); - this->operator--(); - return __it; - } - -private: - friend class path; - - inline _LIBCPP_INLINE_VISIBILITY friend bool operator==(const iterator&, - const iterator&); - - iterator& __increment(); - iterator& __decrement(); - - path __stashed_elem_; - const path* __path_ptr_; - path::__string_view __entry_; - _ParserState __state_; -}; - -inline _LIBCPP_INLINE_VISIBILITY bool operator==(const path::iterator& __lhs, - const path::iterator& __rhs) { - return __lhs.__path_ptr_ == __rhs.__path_ptr_ && - __lhs.__entry_.data() == __rhs.__entry_.data(); -} - -inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const path::iterator& __lhs, - const path::iterator& __rhs) { - return !(__lhs == __rhs); -} - -// TODO(ldionne): We need to pop the pragma and push it again after -// filesystem_error to work around PR41078. -_LIBCPP_AVAILABILITY_FILESYSTEM_POP - -class _LIBCPP_AVAILABILITY_FILESYSTEM _LIBCPP_EXCEPTION_ABI filesystem_error : public system_error { -public: - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, error_code __ec) - : system_error(__ec, __what), - __storage_(make_shared<_Storage>(path(), path())) { - __create_what(0); - } - - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, const path& __p1, error_code __ec) - : system_error(__ec, __what), - __storage_(make_shared<_Storage>(__p1, path())) { - __create_what(1); - } - - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, const path& __p1, const path& __p2, - error_code __ec) - : system_error(__ec, __what), - __storage_(make_shared<_Storage>(__p1, __p2)) { - __create_what(2); - } - - _LIBCPP_INLINE_VISIBILITY - const path& path1() const noexcept { return __storage_->__p1_; } - - _LIBCPP_INLINE_VISIBILITY - const path& path2() const noexcept { return __storage_->__p2_; } - - filesystem_error(const filesystem_error&) = default; - ~filesystem_error() override; // key function - - _LIBCPP_INLINE_VISIBILITY - const char* what() const noexcept override { - return __storage_->__what_.c_str(); - } - - void __create_what(int __num_paths); - -private: - struct _LIBCPP_HIDDEN _Storage { - _LIBCPP_INLINE_VISIBILITY - _Storage(const path& __p1, const path& __p2) : __p1_(__p1), __p2_(__p2) {} - - path __p1_; - path __p2_; - string __what_; - }; - shared_ptr<_Storage> __storage_; -}; - -_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH - -template -_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY -#ifndef _LIBCPP_NO_EXCEPTIONS -void __throw_filesystem_error(_Args&&... __args) { - throw filesystem_error(_VSTD::forward<_Args>(__args)...); -} -#else -void __throw_filesystem_error(_Args&&...) { - _VSTD::abort(); -} -#endif - -// operational functions - -_LIBCPP_FUNC_VIS -path __absolute(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __canonical(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __copy(const path& __from, const path& __to, copy_options __opt, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __copy_file(const path& __from, const path& __to, copy_options __opt, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __create_directories(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -bool __create_directory(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -bool __create_directory(const path& p, const path& attributes, - error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __create_directory_symlink(const path& __to, const path& __new_symlink, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __create_hard_link(const path& __to, const path& __new_hard_link, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __create_symlink(const path& __to, const path& __new_symlink, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __current_path(error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __current_path(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __equivalent(const path&, const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __file_size(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __hard_link_count(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __fs_is_empty(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -file_time_type __last_write_time(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __last_write_time(const path& p, file_time_type new_time, - error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __permissions(const path&, perms, perm_options, error_code* = nullptr); -_LIBCPP_FUNC_VIS -path __read_symlink(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -bool __remove(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __remove_all(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __rename(const path& from, const path& to, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __resize_file(const path& p, uintmax_t size, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -space_info __space(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -file_status __status(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -file_status __symlink_status(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __system_complete(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __temp_directory_path(error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __weakly_canonical(path const& __p, error_code* __ec = nullptr); - -inline _LIBCPP_INLINE_VISIBILITY path current_path() { - return __current_path(); -} - -inline _LIBCPP_INLINE_VISIBILITY path current_path(error_code& __ec) { - return __current_path(&__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p) { - __current_path(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p, - error_code& __ec) noexcept { - __current_path(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p) { - return __absolute(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p, - error_code& __ec) { - return __absolute(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p) { - return __canonical(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p, - error_code& __ec) { - return __canonical(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, - const path& __to) { - __copy(__from, __to, copy_options::none); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, - error_code& __ec) { - __copy(__from, __to, copy_options::none, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, - copy_options __opt) { - __copy(__from, __to, __opt); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, - copy_options __opt, - error_code& __ec) { - __copy(__from, __to, __opt, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, - const path& __to) { - return __copy_file(__from, __to, copy_options::none); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -copy_file(const path& __from, const path& __to, error_code& __ec) { - return __copy_file(__from, __to, copy_options::none, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -copy_file(const path& __from, const path& __to, copy_options __opt) { - return __copy_file(__from, __to, __opt); -} - -inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, - const path& __to, - copy_options __opt, - error_code& __ec) { - return __copy_file(__from, __to, __opt, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy_symlink(const path& __existing, - const path& __new) { - __copy_symlink(__existing, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -copy_symlink(const path& __ext, const path& __new, error_code& __ec) noexcept { - __copy_symlink(__ext, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p) { - return __create_directories(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p, - error_code& __ec) { - return __create_directories(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p) { - return __create_directory(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -create_directory(const path& __p, error_code& __ec) noexcept { - return __create_directory(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p, - const path& __attrs) { - return __create_directory(__p, __attrs); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -create_directory(const path& __p, const path& __attrs, - error_code& __ec) noexcept { - return __create_directory(__p, __attrs, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_directory_symlink(const path& __to, const path& __new) { - __create_directory_symlink(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_directory_symlink(const path& __to, const path& __new, - error_code& __ec) noexcept { - __create_directory_symlink(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void create_hard_link(const path& __to, - const path& __new) { - __create_hard_link(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_hard_link(const path& __to, const path& __new, - error_code& __ec) noexcept { - __create_hard_link(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void create_symlink(const path& __to, - const path& __new) { - __create_symlink(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_symlink(const path& __to, const path& __new, error_code& __ec) noexcept { - return __create_symlink(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool status_known(file_status __s) noexcept { - return __s.type() != file_type::none; -} - -inline _LIBCPP_INLINE_VISIBILITY bool exists(file_status __s) noexcept { - return status_known(__s) && __s.type() != file_type::not_found; -} - -inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p) { - return exists(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p, - error_code& __ec) noexcept { - auto __s = __status(__p, &__ec); - if (status_known(__s)) - __ec.clear(); - return exists(__s); -} - -inline _LIBCPP_INLINE_VISIBILITY bool equivalent(const path& __p1, - const path& __p2) { - return __equivalent(__p1, __p2); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { - return __equivalent(__p1, __p2, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t file_size(const path& __p) { - return __file_size(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t -file_size(const path& __p, error_code& __ec) noexcept { - return __file_size(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t hard_link_count(const path& __p) { - return __hard_link_count(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t -hard_link_count(const path& __p, error_code& __ec) noexcept { - return __hard_link_count(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(file_status __s) noexcept { - return __s.type() == file_type::block; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p) { - return is_block_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p, - error_code& __ec) noexcept { - return is_block_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_character_file(file_status __s) noexcept { - return __s.type() == file_type::character; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_character_file(const path& __p) { - return is_character_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_character_file(const path& __p, error_code& __ec) noexcept { - return is_character_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_directory(file_status __s) noexcept { - return __s.type() == file_type::directory; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p) { - return is_directory(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p, - error_code& __ec) noexcept { - return is_directory(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p) { - return __fs_is_empty(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p, - error_code& __ec) { - return __fs_is_empty(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(file_status __s) noexcept { - return __s.type() == file_type::fifo; -} -inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p) { - return is_fifo(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p, - error_code& __ec) noexcept { - return is_fifo(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_regular_file(file_status __s) noexcept { - return __s.type() == file_type::regular; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_regular_file(const path& __p) { - return is_regular_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_regular_file(const path& __p, error_code& __ec) noexcept { - return is_regular_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_socket(file_status __s) noexcept { - return __s.type() == file_type::socket; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p) { - return is_socket(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p, - error_code& __ec) noexcept { - return is_socket(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(file_status __s) noexcept { - return __s.type() == file_type::symlink; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p) { - return is_symlink(__symlink_status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p, - error_code& __ec) noexcept { - return is_symlink(__symlink_status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_other(file_status __s) noexcept { - return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && - !is_symlink(__s); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p) { - return is_other(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p, - error_code& __ec) noexcept { - return is_other(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY file_time_type -last_write_time(const path& __p) { - return __last_write_time(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY file_time_type -last_write_time(const path& __p, error_code& __ec) noexcept { - return __last_write_time(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void last_write_time(const path& __p, - file_time_type __t) { - __last_write_time(__p, __t); -} - -inline _LIBCPP_INLINE_VISIBILITY void -last_write_time(const path& __p, file_time_type __t, - error_code& __ec) noexcept { - __last_write_time(__p, __t, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void -permissions(const path& __p, perms __prms, - perm_options __opts = perm_options::replace) { - __permissions(__p, __prms, __opts); -} - -inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, - error_code& __ec) noexcept { - __permissions(__p, __prms, perm_options::replace, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, - perm_options __opts, - error_code& __ec) { - __permissions(__p, __prms, __opts, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, - const path& __base, - error_code& __ec) { - path __tmp = __weakly_canonical(__p, &__ec); - if (__ec) - return {}; - path __tmp_base = __weakly_canonical(__base, &__ec); - if (__ec) - return {}; - return __tmp.lexically_proximate(__tmp_base); -} - -inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, - error_code& __ec) { - return proximate(__p, current_path(), __ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path -proximate(const path& __p, const path& __base = current_path()) { - return __weakly_canonical(__p).lexically_proximate( - __weakly_canonical(__base)); -} - -inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p) { - return __read_symlink(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p, - error_code& __ec) { - return __read_symlink(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, - const path& __base, - error_code& __ec) { - path __tmp = __weakly_canonical(__p, &__ec); - if (__ec) - return path(); - path __tmpbase = __weakly_canonical(__base, &__ec); - if (__ec) - return path(); - return __tmp.lexically_relative(__tmpbase); -} - -inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, - error_code& __ec) { - return relative(__p, current_path(), __ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path -relative(const path& __p, const path& __base = current_path()) { - return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p) { - return __remove(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p, - error_code& __ec) noexcept { - return __remove(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p) { - return __remove_all(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p, - error_code& __ec) { - return __remove_all(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void rename(const path& __from, - const path& __to) { - return __rename(__from, __to); -} - -inline _LIBCPP_INLINE_VISIBILITY void -rename(const path& __from, const path& __to, error_code& __ec) noexcept { - return __rename(__from, __to, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void resize_file(const path& __p, - uintmax_t __ns) { - return __resize_file(__p, __ns); -} - -inline _LIBCPP_INLINE_VISIBILITY void -resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { - return __resize_file(__p, __ns, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p) { - return __space(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p, - error_code& __ec) noexcept { - return __space(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p) { - return __status(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p, - error_code& __ec) noexcept { - return __status(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status symlink_status(const path& __p) { - return __symlink_status(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status -symlink_status(const path& __p, error_code& __ec) noexcept { - return __symlink_status(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path() { - return __temp_directory_path(); -} - -inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path(error_code& __ec) { - return __temp_directory_path(&__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p) { - return __weakly_canonical(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p, - error_code& __ec) { - return __weakly_canonical(__p, &__ec); -} - -class directory_iterator; -class recursive_directory_iterator; -class _LIBCPP_HIDDEN __dir_stream; - -class directory_entry { - typedef _VSTD_FS::path _Path; - -public: - // constructors and destructors - directory_entry() noexcept = default; - directory_entry(directory_entry const&) = default; - directory_entry(directory_entry&&) noexcept = default; - - _LIBCPP_INLINE_VISIBILITY - explicit directory_entry(_Path const& __p) : __p_(__p) { - error_code __ec; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - directory_entry(_Path const& __p, error_code& __ec) : __p_(__p) { - __refresh(&__ec); - } - - ~directory_entry() {} - - directory_entry& operator=(directory_entry const&) = default; - directory_entry& operator=(directory_entry&&) noexcept = default; - - _LIBCPP_INLINE_VISIBILITY - void assign(_Path const& __p) { - __p_ = __p; - error_code __ec; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void assign(_Path const& __p, error_code& __ec) { - __p_ = __p; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void replace_filename(_Path const& __p) { - __p_.replace_filename(__p); - error_code __ec; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void replace_filename(_Path const& __p, error_code& __ec) { - __p_ = __p_.parent_path() / __p; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void refresh() { __refresh(); } - - _LIBCPP_INLINE_VISIBILITY - void refresh(error_code& __ec) noexcept { __refresh(&__ec); } - - _LIBCPP_INLINE_VISIBILITY - _Path const& path() const noexcept { return __p_; } - - _LIBCPP_INLINE_VISIBILITY - operator const _Path&() const noexcept { return __p_; } - - _LIBCPP_INLINE_VISIBILITY - bool exists() const { return _VSTD_FS::exists(file_status{__get_ft()}); } - - _LIBCPP_INLINE_VISIBILITY - bool exists(error_code& __ec) const noexcept { - return _VSTD_FS::exists(file_status{__get_ft(&__ec)}); - } - - _LIBCPP_INLINE_VISIBILITY - bool is_block_file() const { return __get_ft() == file_type::block; } - - _LIBCPP_INLINE_VISIBILITY - bool is_block_file(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::block; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_character_file() const { return __get_ft() == file_type::character; } - - _LIBCPP_INLINE_VISIBILITY - bool is_character_file(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::character; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_directory() const { return __get_ft() == file_type::directory; } - - _LIBCPP_INLINE_VISIBILITY - bool is_directory(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::directory; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_fifo() const { return __get_ft() == file_type::fifo; } - - _LIBCPP_INLINE_VISIBILITY - bool is_fifo(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::fifo; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_other() const { return _VSTD_FS::is_other(file_status{__get_ft()}); } - - _LIBCPP_INLINE_VISIBILITY - bool is_other(error_code& __ec) const noexcept { - return _VSTD_FS::is_other(file_status{__get_ft(&__ec)}); - } - - _LIBCPP_INLINE_VISIBILITY - bool is_regular_file() const { return __get_ft() == file_type::regular; } - - _LIBCPP_INLINE_VISIBILITY - bool is_regular_file(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::regular; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_socket() const { return __get_ft() == file_type::socket; } - - _LIBCPP_INLINE_VISIBILITY - bool is_socket(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::socket; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } - - _LIBCPP_INLINE_VISIBILITY - bool is_symlink(error_code& __ec) const noexcept { - return __get_sym_ft(&__ec) == file_type::symlink; - } - _LIBCPP_INLINE_VISIBILITY - uintmax_t file_size() const { return __get_size(); } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t file_size(error_code& __ec) const noexcept { - return __get_size(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t hard_link_count() const { return __get_nlink(); } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t hard_link_count(error_code& __ec) const noexcept { - return __get_nlink(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_time_type last_write_time() const { return __get_write_time(); } - - _LIBCPP_INLINE_VISIBILITY - file_time_type last_write_time(error_code& __ec) const noexcept { - return __get_write_time(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_status status() const { return __get_status(); } - - _LIBCPP_INLINE_VISIBILITY - file_status status(error_code& __ec) const noexcept { - return __get_status(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_status symlink_status() const { return __get_symlink_status(); } - - _LIBCPP_INLINE_VISIBILITY - file_status symlink_status(error_code& __ec) const noexcept { - return __get_symlink_status(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - bool operator<(directory_entry const& __rhs) const noexcept { - return __p_ < __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator==(directory_entry const& __rhs) const noexcept { - return __p_ == __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator!=(directory_entry const& __rhs) const noexcept { - return __p_ != __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator<=(directory_entry const& __rhs) const noexcept { - return __p_ <= __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator>(directory_entry const& __rhs) const noexcept { - return __p_ > __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator>=(directory_entry const& __rhs) const noexcept { - return __p_ >= __rhs.__p_; - } - -private: - friend class directory_iterator; - friend class recursive_directory_iterator; - friend class __dir_stream; - - enum _CacheType : unsigned char { - _Empty, - _IterSymlink, - _IterNonSymlink, - _RefreshSymlink, - _RefreshSymlinkUnresolved, - _RefreshNonSymlink - }; - - struct __cached_data { - uintmax_t __size_; - uintmax_t __nlink_; - file_time_type __write_time_; - perms __sym_perms_; - perms __non_sym_perms_; - file_type __type_; - _CacheType __cache_type_; - - _LIBCPP_INLINE_VISIBILITY - __cached_data() noexcept { __reset(); } - - _LIBCPP_INLINE_VISIBILITY - void __reset() { - __cache_type_ = _Empty; - __type_ = file_type::none; - __sym_perms_ = __non_sym_perms_ = perms::unknown; - __size_ = __nlink_ = uintmax_t(-1); - __write_time_ = file_time_type::min(); - } - }; - - _LIBCPP_INLINE_VISIBILITY - static __cached_data __create_iter_result(file_type __ft) { - __cached_data __data; - __data.__type_ = __ft; - __data.__cache_type_ = [&]() { - switch (__ft) { - case file_type::none: - return _Empty; - case file_type::symlink: - return _IterSymlink; - default: - return _IterNonSymlink; - } - }(); - return __data; - } - - _LIBCPP_INLINE_VISIBILITY - void __assign_iter_entry(_Path&& __p, __cached_data __dt) { - __p_ = _VSTD::move(__p); - __data_ = __dt; - } - - _LIBCPP_FUNC_VIS - error_code __do_refresh() noexcept; - - _LIBCPP_INLINE_VISIBILITY - static bool __is_dne_error(error_code const& __ec) { - if (!__ec) - return true; - switch (static_cast(__ec.value())) { - case errc::no_such_file_or_directory: - case errc::not_a_directory: - return true; - default: - return false; - } - } - - _LIBCPP_INLINE_VISIBILITY - void __handle_error(const char* __msg, error_code* __dest_ec, - error_code const& __ec, bool __allow_dne = false) const { - if (__dest_ec) { - *__dest_ec = __ec; - return; - } - if (__ec && (!__allow_dne || !__is_dne_error(__ec))) - __throw_filesystem_error(__msg, __p_, __ec); - } - - _LIBCPP_INLINE_VISIBILITY - void __refresh(error_code* __ec = nullptr) { - __handle_error("in directory_entry::refresh", __ec, __do_refresh(), - /*allow_dne*/ true); - } - - _LIBCPP_INLINE_VISIBILITY - file_type __get_sym_ft(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - return __symlink_status(__p_, __ec).type(); - case _IterSymlink: - case _RefreshSymlink: - case _RefreshSymlinkUnresolved: - if (__ec) - __ec->clear(); - return file_type::symlink; - case _IterNonSymlink: - case _RefreshNonSymlink: - file_status __st(__data_.__type_); - if (__ec && !_VSTD_FS::exists(__st)) - *__ec = make_error_code(errc::no_such_file_or_directory); - else if (__ec) - __ec->clear(); - return __data_.__type_; - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_type __get_ft(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return __status(__p_, __ec).type(); - case _IterNonSymlink: - case _RefreshNonSymlink: - case _RefreshSymlink: { - file_status __st(__data_.__type_); - if (__ec && !_VSTD_FS::exists(__st)) - *__ec = make_error_code(errc::no_such_file_or_directory); - else if (__ec) - __ec->clear(); - return __data_.__type_; - } - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_status __get_status(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return __status(__p_, __ec); - case _RefreshNonSymlink: - case _RefreshSymlink: - return file_status(__get_ft(__ec), __data_.__non_sym_perms_); - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_status __get_symlink_status(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - return __symlink_status(__p_, __ec); - case _RefreshNonSymlink: - return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_); - case _RefreshSymlink: - case _RefreshSymlinkUnresolved: - return file_status(__get_sym_ft(__ec), __data_.__sym_perms_); - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t __get_size(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return _VSTD_FS::__file_size(__p_, __ec); - case _RefreshSymlink: - case _RefreshNonSymlink: { - error_code __m_ec; - file_status __st(__get_ft(&__m_ec)); - __handle_error("in directory_entry::file_size", __ec, __m_ec); - if (_VSTD_FS::exists(__st) && !_VSTD_FS::is_regular_file(__st)) { - errc __err_kind = _VSTD_FS::is_directory(__st) ? errc::is_a_directory - : errc::not_supported; - __handle_error("in directory_entry::file_size", __ec, - make_error_code(__err_kind)); - } - return __data_.__size_; - } - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t __get_nlink(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return _VSTD_FS::__hard_link_count(__p_, __ec); - case _RefreshSymlink: - case _RefreshNonSymlink: { - error_code __m_ec; - (void)__get_ft(&__m_ec); - __handle_error("in directory_entry::hard_link_count", __ec, __m_ec); - return __data_.__nlink_; - } - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_time_type __get_write_time(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return _VSTD_FS::__last_write_time(__p_, __ec); - case _RefreshSymlink: - case _RefreshNonSymlink: { - error_code __m_ec; - file_status __st(__get_ft(&__m_ec)); - __handle_error("in directory_entry::last_write_time", __ec, __m_ec); - if (_VSTD_FS::exists(__st) && - __data_.__write_time_ == file_time_type::min()) - __handle_error("in directory_entry::last_write_time", __ec, - make_error_code(errc::value_too_large)); - return __data_.__write_time_; - } - } - _LIBCPP_UNREACHABLE(); - } - -private: - _Path __p_; - __cached_data __data_; -}; - -class __dir_element_proxy { -public: - inline _LIBCPP_INLINE_VISIBILITY directory_entry operator*() { - return _VSTD::move(__elem_); - } - -private: - friend class directory_iterator; - friend class recursive_directory_iterator; - explicit __dir_element_proxy(directory_entry const& __e) : __elem_(__e) {} - __dir_element_proxy(__dir_element_proxy&& __o) - : __elem_(_VSTD::move(__o.__elem_)) {} - directory_entry __elem_; -}; - -class directory_iterator { -public: - typedef directory_entry value_type; - typedef ptrdiff_t difference_type; - typedef value_type const* pointer; - typedef value_type const& reference; - typedef input_iterator_tag iterator_category; - -public: - //ctor & dtor - directory_iterator() noexcept {} - - explicit directory_iterator(const path& __p) - : directory_iterator(__p, nullptr) {} - - directory_iterator(const path& __p, directory_options __opts) - : directory_iterator(__p, nullptr, __opts) {} - - directory_iterator(const path& __p, error_code& __ec) - : directory_iterator(__p, &__ec) {} - - directory_iterator(const path& __p, directory_options __opts, - error_code& __ec) - : directory_iterator(__p, &__ec, __opts) {} - - directory_iterator(const directory_iterator&) = default; - directory_iterator(directory_iterator&&) = default; - directory_iterator& operator=(const directory_iterator&) = default; - - directory_iterator& operator=(directory_iterator&& __o) noexcept { - // non-default implementation provided to support self-move assign. - if (this != &__o) { - __imp_ = _VSTD::move(__o.__imp_); - } - return *this; - } - - ~directory_iterator() = default; - - const directory_entry& operator*() const { - _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); - return __dereference(); - } - - const directory_entry* operator->() const { return &**this; } - - directory_iterator& operator++() { return __increment(); } - - __dir_element_proxy operator++(int) { - __dir_element_proxy __p(**this); - __increment(); - return __p; - } - - directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } - -private: - inline _LIBCPP_INLINE_VISIBILITY friend bool - operator==(const directory_iterator& __lhs, - const directory_iterator& __rhs) noexcept; - - // construct the dir_stream - _LIBCPP_FUNC_VIS - directory_iterator(const path&, error_code*, - directory_options = directory_options::none); - - _LIBCPP_FUNC_VIS - directory_iterator& __increment(error_code* __ec = nullptr); - - _LIBCPP_FUNC_VIS - const directory_entry& __dereference() const; - -private: - shared_ptr<__dir_stream> __imp_; -}; - -inline _LIBCPP_INLINE_VISIBILITY bool -operator==(const directory_iterator& __lhs, - const directory_iterator& __rhs) noexcept { - return __lhs.__imp_ == __rhs.__imp_; -} - -inline _LIBCPP_INLINE_VISIBILITY bool -operator!=(const directory_iterator& __lhs, - const directory_iterator& __rhs) noexcept { - return !(__lhs == __rhs); -} - -// enable directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY directory_iterator -begin(directory_iterator __iter) noexcept { - return __iter; -} - -inline _LIBCPP_INLINE_VISIBILITY directory_iterator -end(directory_iterator) noexcept { - return directory_iterator(); -} - -class recursive_directory_iterator { -public: - using value_type = directory_entry; - using difference_type = ptrdiff_t; - using pointer = directory_entry const*; - using reference = directory_entry const&; - using iterator_category = input_iterator_tag; - -public: - // constructors and destructor - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator() noexcept : __rec_(false) {} - - _LIBCPP_INLINE_VISIBILITY - explicit recursive_directory_iterator( - const path& __p, directory_options __xoptions = directory_options::none) - : recursive_directory_iterator(__p, __xoptions, nullptr) {} - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator(const path& __p, directory_options __xoptions, - error_code& __ec) - : recursive_directory_iterator(__p, __xoptions, &__ec) {} - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator(const path& __p, error_code& __ec) - : recursive_directory_iterator(__p, directory_options::none, &__ec) {} - - recursive_directory_iterator(const recursive_directory_iterator&) = default; - recursive_directory_iterator(recursive_directory_iterator&&) = default; - - recursive_directory_iterator& - operator=(const recursive_directory_iterator&) = default; - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator& - operator=(recursive_directory_iterator&& __o) noexcept { - // non-default implementation provided to support self-move assign. - if (this != &__o) { - __imp_ = _VSTD::move(__o.__imp_); - __rec_ = __o.__rec_; - } - return *this; - } - - ~recursive_directory_iterator() = default; - - _LIBCPP_INLINE_VISIBILITY - const directory_entry& operator*() const { return __dereference(); } - - _LIBCPP_INLINE_VISIBILITY - const directory_entry* operator->() const { return &__dereference(); } - - recursive_directory_iterator& operator++() { return __increment(); } - - _LIBCPP_INLINE_VISIBILITY - __dir_element_proxy operator++(int) { - __dir_element_proxy __p(**this); - __increment(); - return __p; - } - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator& increment(error_code& __ec) { - return __increment(&__ec); - } - - _LIBCPP_FUNC_VIS directory_options options() const; - _LIBCPP_FUNC_VIS int depth() const; - - _LIBCPP_INLINE_VISIBILITY - void pop() { __pop(); } - - _LIBCPP_INLINE_VISIBILITY - void pop(error_code& __ec) { __pop(&__ec); } - - _LIBCPP_INLINE_VISIBILITY - bool recursion_pending() const { return __rec_; } - - _LIBCPP_INLINE_VISIBILITY - void disable_recursion_pending() { __rec_ = false; } - -private: - _LIBCPP_FUNC_VIS - recursive_directory_iterator(const path& __p, directory_options __opt, - error_code* __ec); - - _LIBCPP_FUNC_VIS - const directory_entry& __dereference() const; - - _LIBCPP_FUNC_VIS - bool __try_recursion(error_code* __ec); - - _LIBCPP_FUNC_VIS - void __advance(error_code* __ec = nullptr); - - _LIBCPP_FUNC_VIS - recursive_directory_iterator& __increment(error_code* __ec = nullptr); - - _LIBCPP_FUNC_VIS - void __pop(error_code* __ec = nullptr); - - inline _LIBCPP_INLINE_VISIBILITY friend bool - operator==(const recursive_directory_iterator&, - const recursive_directory_iterator&) noexcept; - - struct _LIBCPP_HIDDEN __shared_imp; - shared_ptr<__shared_imp> __imp_; - bool __rec_; -}; // class recursive_directory_iterator - -inline _LIBCPP_INLINE_VISIBILITY bool -operator==(const recursive_directory_iterator& __lhs, - const recursive_directory_iterator& __rhs) noexcept { - return __lhs.__imp_ == __rhs.__imp_; -} - -_LIBCPP_INLINE_VISIBILITY -inline bool operator!=(const recursive_directory_iterator& __lhs, - const recursive_directory_iterator& __rhs) noexcept { - return !(__lhs == __rhs); -} -// enable recursive_directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator -begin(recursive_directory_iterator __iter) noexcept { - return __iter; -} - -inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator -end(recursive_directory_iterator) noexcept { - return recursive_directory_iterator(); -} - -_LIBCPP_AVAILABILITY_FILESYSTEM_POP - -_LIBCPP_END_NAMESPACE_FILESYSTEM - -#if !defined(_LIBCPP_HAS_NO_RANGES) -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true; -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true; - -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true; -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true; -#endif - -#endif // !_LIBCPP_CXX03_LANG - -_LIBCPP_POP_MACROS - #endif // _LIBCPP_FILESYSTEM diff --git a/libcxx/include/future b/libcxx/include/future index 99df8831a778..6b666a70f48e 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -366,7 +366,7 @@ template struct uses_allocator, Alloc>; #include <__debug> #include <__memory/allocator_arg_t.h> #include <__memory/uses_allocator.h> -#include <__utility/decay_copy.h> +#include <__utility/auto_cast.h> #include <__utility/forward.h> #include #include @@ -2207,16 +2207,16 @@ async(launch __policy, _Fp&& __f, _Args&&... __args) { #endif if (__does_policy_contain(__policy, launch::async)) - return _VSTD::__make_async_assoc_state<_Rp>(_BF(_VSTD::__decay_copy(_VSTD::forward<_Fp>(__f)), - _VSTD::__decay_copy(_VSTD::forward<_Args>(__args))...)); + return _VSTD::__make_async_assoc_state<_Rp>(_BF(_LIBCPP_AUTO_CAST(_VSTD::forward<_Fp>(__f)), + _LIBCPP_AUTO_CAST(_VSTD::forward<_Args>(__args))...)); #ifndef _LIBCPP_NO_EXCEPTIONS } catch ( ... ) { if (__policy == launch::async) throw ; } #endif if (__does_policy_contain(__policy, launch::deferred)) - return _VSTD::__make_deferred_assoc_state<_Rp>(_BF(_VSTD::__decay_copy(_VSTD::forward<_Fp>(__f)), - _VSTD::__decay_copy(_VSTD::forward<_Args>(__args))...)); + return _VSTD::__make_deferred_assoc_state<_Rp>(_BF(_LIBCPP_AUTO_CAST(_VSTD::forward<_Fp>(__f)), + _LIBCPP_AUTO_CAST(_VSTD::forward<_Args>(__args))...)); return future<_Rp>{}; } diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index fa4170ba1ed3..1dc6db406a74 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -450,6 +450,25 @@ module std [system] { module filesystem { header "filesystem" export * + + module __filesystem { + module copy_options { private header "__filesystem/copy_options.h" } + module directory_entry { private header "__filesystem/directory_entry.h" } + module directory_iterator { private header "__filesystem/directory_iterator.h" } + module directory_options { private header "__filesystem/directory_options.h" } + module file_status { private header "__filesystem/file_status.h" } + module file_time_type { private header "__filesystem/file_time_type.h" } + module file_type { private header "__filesystem/file_type.h" } + module filesystem_error { private header "__filesystem/filesystem_error.h" } + module operations { private header "__filesystem/operations.h" } + module path_iterator { private header "__filesystem/path_iterator.h" } + module path { private header "__filesystem/path.h" } + module perm_options { private header "__filesystem/perm_options.h" } + module perms { private header "__filesystem/perms.h" } + module recursive_directory_iterator { private header "__filesystem/recursive_directory_iterator.h" } + module space_info { private header "__filesystem/space_info.h" } + module u8path { private header "__filesystem/u8path.h" } + } } module format { header "format" @@ -902,8 +921,8 @@ module std [system] { module __utility { module as_const { private header "__utility/as_const.h" } + module auto_cast { private header "__utility/auto_cast.h" } module cmp { private header "__utility/cmp.h" } - module decay_copy { private header "__utility/decay_copy.h" } module declval { private header "__utility/declval.h" } module exchange { private header "__utility/exchange.h" } module forward { private header "__utility/forward.h" } diff --git a/libcxx/include/thread b/libcxx/include/thread index bf751c87aa3b..a4632f6fe524 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -88,7 +88,6 @@ void sleep_for(const chrono::duration& rel_time); #include <__mutex_base> #include <__thread/poll_with_backoff.h> #include <__threading_support> -#include <__utility/decay_copy.h> #include <__utility/forward.h> #include #include @@ -303,8 +302,8 @@ thread::thread(_Fp&& __f, _Args&&... __args) typedef tuple<_TSPtr, typename decay<_Fp>::type, typename decay<_Args>::type...> _Gp; unique_ptr<_Gp> __p( new _Gp(_VSTD::move(__tsp), - _VSTD::__decay_copy(_VSTD::forward<_Fp>(__f)), - _VSTD::__decay_copy(_VSTD::forward<_Args>(__args))...)); + _VSTD::forward<_Fp>(__f), + _VSTD::forward<_Args>(__args)...)); int __ec = _VSTD::__libcpp_thread_create(&__t_, &__thread_proxy<_Gp>, __p.get()); if (__ec == 0) __p.release(); diff --git a/libcxx/include/utility b/libcxx/include/utility index 9ab7b8e0ffb3..9dd7905516a8 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -218,6 +218,7 @@ template #include <__debug> #include <__tuple> #include <__utility/as_const.h> +#include <__utility/auto_cast.h> #include <__utility/cmp.h> #include <__utility/declval.h> #include <__utility/exchange.h> diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp index 6219ceafd36e..90b255d9877f 100644 --- a/libcxx/src/filesystem/directory_iterator.cpp +++ b/libcxx/src/filesystem/directory_iterator.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "filesystem" #include "__config" +#include "filesystem" +#include "stack" #if defined(_LIBCPP_WIN32API) #define WIN32_LEAN_AND_MEAN #define NOMINMAX diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h index a2c340e61083..717894e537d2 100644 --- a/libcxx/src/filesystem/filesystem_common.h +++ b/libcxx/src/filesystem/filesystem_common.h @@ -17,6 +17,7 @@ #include "cstdlib" #include "ctime" #include "filesystem" +#include "system_error" #if !defined(_LIBCPP_WIN32API) # include diff --git a/libcxx/test/configs/apple-libc++-shared.cfg.in b/libcxx/test/configs/apple-libc++-shared.cfg.in index f47ffa88354f..c48c19134a2b 100644 --- a/libcxx/test/configs/apple-libc++-shared.cfg.in +++ b/libcxx/test/configs/apple-libc++-shared.cfg.in @@ -7,8 +7,6 @@ # We also don't use a per-target include directory layout, so we have only one # include directory for the libc++ headers. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -21,7 +19,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env DYLD_LIBRARY_PATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env DYLD_LIBRARY_PATH=%{install}/lib -- ' )) import os, site diff --git a/libcxx/test/configs/cmake-bridge.cfg.in b/libcxx/test/configs/cmake-bridge.cfg.in index e60845e0beb3..b689827439c4 100644 --- a/libcxx/test/configs/cmake-bridge.cfg.in +++ b/libcxx/test/configs/cmake-bridge.cfg.in @@ -2,6 +2,8 @@ @SERIALIZED_LIT_PARAMS@ +import shlex + # # This file performs the bridge between the CMake configuration and the Lit # configuration files by setting up the LitConfig object and various Lit @@ -23,9 +25,10 @@ config.recursiveExpansionLimit = 10 config.test_exec_root = '@CMAKE_BINARY_DIR@' # Add substitutions for bootstrapping the test suite configuration -config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) +config.substitutions.append(('%{cxx}', shlex.quote('@CMAKE_CXX_COMPILER@'))) config.substitutions.append(('%{libcxx}', '@LIBCXX_SOURCE_DIR@')) config.substitutions.append(('%{install}', '@CMAKE_BINARY_DIR@')) config.substitutions.append(('%{include}', '%{install}/@LIBCXX_INSTALL_INCLUDE_DIR@')) config.substitutions.append(('%{target-include}', '%{install}/@LIBCXX_INSTALL_INCLUDE_TARGET_DIR@')) config.substitutions.append(('%{lib}', '%{install}/@LIBCXX_INSTALL_LIBRARY_DIR@')) +config.substitutions.append(('%{executor}', '@LIBCXX_EXECUTOR@')) diff --git a/libcxx/test/configs/ibm-libc++-shared.cfg.in b/libcxx/test/configs/ibm-libc++-shared.cfg.in index 4efdf423207e..c0675b83726a 100644 --- a/libcxx/test/configs/ibm-libc++-shared.cfg.in +++ b/libcxx/test/configs/ibm-libc++-shared.cfg.in @@ -2,8 +2,6 @@ # AIX using a shared library. # -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', '')) @@ -14,7 +12,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++ -lc++abi -latomic -Wl,-bbigtoc' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env LIBPATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env LIBPATH=%{install}/lib -- ' )) # LIBCXX-AIX-FIXME is the feature name used to XFAIL the diff --git a/libcxx/test/configs/llvm-libc++-mingw.cfg.in b/libcxx/test/configs/llvm-libc++-mingw.cfg.in new file mode 100644 index 000000000000..07f53c41886e --- /dev/null +++ b/libcxx/test/configs/llvm-libc++-mingw.cfg.in @@ -0,0 +1,32 @@ +# This testing configuration handles running the test suite against LLVM's libc++ +# using either a DLL or a static library, with MinGW/Clang on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -isystem %{include} -isystem %{target-include} -I %{libcxx}/test/support' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib++ -L %{lib} -lc++' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +# LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the +# initial Windows failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR32730) +config.available_features.add('LIBCXX-WINDOWS-FIXME') + +import os, site +site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in new file mode 100644 index 000000000000..dcb2b07ae724 --- /dev/null +++ b/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in @@ -0,0 +1,32 @@ +# This testing configuration handles running the test suite against LLVM's libc++ +# using a DLL, with Clang-cl on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '--driver-mode=g++')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -isystem %{include} -isystem %{target-include} -I %{libcxx}/test/support -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_STDIO_ISO_WIDE_SPECIFIERS -DNOMINMAX' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib -L %{lib} -lc++ -lmsvcrt -lmsvcprt -loldnames' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +# LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the +# initial Windows failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR32730) +config.available_features.add('LIBCXX-WINDOWS-FIXME') + +import os, site +site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in b/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in index d6fcaea2da89..246e84837a45 100644 --- a/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in +++ b/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in @@ -2,8 +2,6 @@ # using a shared library, with GCC. This is done differently from Clang because # GCC does not support the -nostdlib++ command-line flag. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', '')) @@ -14,7 +12,7 @@ config.substitutions.append(('%{link_flags}', '-L %{lib} -Wl,-rpath,%{lib} -nodefaultlibs -lc++ -lm -lgcc_s -lgcc -lpthread -lc -lgcc_s -lgcc -latomic' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T -- '.format(sys.executable) + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libcxx/test/configs/llvm-libc++-shared.cfg.in b/libcxx/test/configs/llvm-libc++-shared.cfg.in index 37e5c7b6709a..4d15c970bd29 100644 --- a/libcxx/test/configs/llvm-libc++-shared.cfg.in +++ b/libcxx/test/configs/llvm-libc++-shared.cfg.in @@ -1,8 +1,6 @@ # This testing configuration handles running the test suite against LLVM's libc++ # using a shared library. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{lib} -Wl,-rpath,%{lib} -lc++ -pthread' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T -- '.format(sys.executable) + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in new file mode 100644 index 000000000000..14812b2af9c1 --- /dev/null +++ b/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in @@ -0,0 +1,32 @@ +# This testing configuration handles running the test suite against LLVM's libc++ +# using a static library, with Clang-cl on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '--driver-mode=g++')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -isystem %{include} -isystem %{target-include} -I %{libcxx}/test/support -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_STDIO_ISO_WIDE_SPECIFIERS -DNOMINMAX' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib -L %{lib} -llibc++ -lmsvcrt -lmsvcprt -loldnames' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +# LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the +# initial Windows failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR32730) +config.available_features.add('LIBCXX-WINDOWS-FIXME') + +import os, site +site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxx/test/configs/llvm-libc++-static.cfg.in b/libcxx/test/configs/llvm-libc++-static.cfg.in index 57229ddf9023..eaa711a82908 100644 --- a/libcxx/test/configs/llvm-libc++-static.cfg.in +++ b/libcxx/test/configs/llvm-libc++-static.cfg.in @@ -1,8 +1,6 @@ # This testing configuration handles running the test suite against LLVM's libc++ # using a static library. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{lib} -lc++ -lc++abi -pthread' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T -- '.format(sys.executable) + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp new file mode 100644 index 000000000000..064a034a9dd2 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/copy_options.h'}} +#include <__filesystem/copy_options.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp new file mode 100644 index 000000000000..6fef616cd53b --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/directory_entry.h'}} +#include <__filesystem/directory_entry.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp new file mode 100644 index 000000000000..6d01cd743d85 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/directory_iterator.h'}} +#include <__filesystem/directory_iterator.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp new file mode 100644 index 000000000000..2316023813f0 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/directory_options.h'}} +#include <__filesystem/directory_options.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp new file mode 100644 index 000000000000..9419bdb2d573 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/file_status.h'}} +#include <__filesystem/file_status.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp new file mode 100644 index 000000000000..ac940261c096 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/file_time_type.h'}} +#include <__filesystem/file_time_type.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp new file mode 100644 index 000000000000..70b5d9792c45 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/file_type.h'}} +#include <__filesystem/file_type.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp new file mode 100644 index 000000000000..095a71569ff5 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/filesystem_error.h'}} +#include <__filesystem/filesystem_error.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp new file mode 100644 index 000000000000..dc156cd5145b --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/operations.h'}} +#include <__filesystem/operations.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/utility/decay_copy.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp similarity index 89% rename from libcxx/test/libcxx/diagnostics/detail.headers/utility/decay_copy.module.verify.cpp rename to libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp index b15d23c71f2c..f232bf7b98f8 100644 --- a/libcxx/test/libcxx/diagnostics/detail.headers/utility/decay_copy.module.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp @@ -11,5 +11,5 @@ // WARNING: This test was generated by 'generate_private_header_tests.py' // and should not be edited manually. -// expected-error@*:* {{use of private header from outside its module: '__utility/decay_copy.h'}} -#include <__utility/decay_copy.h> +// expected-error@*:* {{use of private header from outside its module: '__filesystem/path.h'}} +#include <__filesystem/path.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp new file mode 100644 index 000000000000..71e1928d2508 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/path_iterator.h'}} +#include <__filesystem/path_iterator.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp new file mode 100644 index 000000000000..8a8c25edea98 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/perm_options.h'}} +#include <__filesystem/perm_options.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp new file mode 100644 index 000000000000..0df582305e8a --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/perms.h'}} +#include <__filesystem/perms.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp new file mode 100644 index 000000000000..73b7de8f8196 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/recursive_directory_iterator.h'}} +#include <__filesystem/recursive_directory_iterator.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp new file mode 100644 index 000000000000..d5f7586ca5d2 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/space_info.h'}} +#include <__filesystem/space_info.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp new file mode 100644 index 000000000000..5f54ab6908f2 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/u8path.h'}} +#include <__filesystem/u8path.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/utility/auto_cast.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/utility/auto_cast.module.verify.cpp new file mode 100644 index 000000000000..cf012c3d17e0 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/utility/auto_cast.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__utility/auto_cast.h'}} +#include <__utility/auto_cast.h> diff --git a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp index f93737c2372d..bbeaedd51f22 100644 --- a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp @@ -14,12 +14,13 @@ // ADDITIONAL_COMPILE_FLAGS: -I %S/../../../../src/filesystem -#include +#include #include -#include -#include #include -#include +#include +#include +#include +#include #include "filesystem_common.h" diff --git a/libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp b/libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp new file mode 100644 index 000000000000..0bf359900145 --- /dev/null +++ b/libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu1.o -DTU1 +// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu2.o -DTU2 +// RUN: %{cxx} %t.tu1.o %t.tu2.o %{flags} %{link_flags} -o %t.exe +// RUN: %{exec} %t.exe + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// Test the libc++-specific behavior that we handle the IFNDR case for ranges::begin +// by returning the beginning of the array-of-incomplete-type. +// Use two translation units so that `Incomplete` really is never completed +// at any point within TU2, but the array `bounded` is still given a definition +// (in TU1) to avoid an "undefined reference" error from the linker. +// All of the actually interesting stuff takes place within TU2. + +#include +#include + +#include "test_macros.h" + +#if defined(TU1) + +struct Incomplete {}; +Incomplete bounded[10]; +Incomplete unbounded[10]; + +#else // defined(TU1) + +struct Incomplete; + +constexpr bool test() +{ + { + extern Incomplete bounded[10]; + assert(std::ranges::begin(bounded) == bounded); + assert(std::ranges::cbegin(bounded) == bounded); + assert(std::ranges::begin(std::as_const(bounded)) == bounded); + assert(std::ranges::cbegin(std::as_const(bounded)) == bounded); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(bounded)), Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(bounded)), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(std::as_const(bounded))), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(std::as_const(bounded))), const Incomplete*); + } + { + extern Incomplete unbounded[]; + assert(std::ranges::begin(unbounded) == unbounded); + assert(std::ranges::cbegin(unbounded) == unbounded); + assert(std::ranges::begin(std::as_const(unbounded)) == unbounded); + assert(std::ranges::cbegin(std::as_const(unbounded)) == unbounded); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(unbounded)), Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(unbounded)), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(std::as_const(unbounded))), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(std::as_const(unbounded))), const Incomplete*); + } + + return true; +} + +int main(int, char**) +{ + test(); + static_assert(test()); +} + +#endif // defined(TU1) diff --git a/libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp b/libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp new file mode 100644 index 000000000000..4b83ca8945a3 --- /dev/null +++ b/libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// Test the libc++-specific behavior that we handle the IFNDR case for ranges::end +// by being SFINAE-friendly. + +#include +#include +#include + +struct Incomplete; + +constexpr bool test() +{ + { + extern Incomplete bounded[10]; + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + } + { + extern Incomplete unbounded[]; + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + } + + return true; +} + +int main(int, char**) +{ + test(); + static_assert(test()); +} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp deleted file mode 100644 index 41322529792e..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::begin on an array of incomplete type. - -#include - -#include - -using begin_t = decltype(std::ranges::begin); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::begin` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp deleted file mode 100644 index bd0ff5e85dd2..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::cbegin on an array of incomplete type. - -#include - -#include - -using cbegin_t = decltype(std::ranges::cbegin); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::end` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp deleted file mode 100644 index b4d6729c9547..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::cend on an array of incomplete type. - -#include - -#include - -using cend_t = decltype(std::ranges::cend); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::end` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp deleted file mode 100644 index ecfe1c4e5a5a..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::end on an array of incomplete type. - -#include - -#include - -using end_t = decltype(std::ranges::end); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::end` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp deleted file mode 100644 index f67328084c6b..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::data on an array of incomplete type. - -#include - -struct Incomplete; - -void f(Incomplete arr[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f2(Incomplete arr[2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&arr)[2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&&arr)[2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&arr)[2][2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp deleted file mode 100644 index 3b5f79a5bdf5..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::empty on an array of incomplete type. - -#include - -struct Incomplete; - -void f(Incomplete arr[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{call to deleted function call operator in type}} - // expected-error@*:* {{attempt to use a deleted function}} - std::ranges::begin(arr); -} - -void f(Incomplete(&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - std::ranges::begin(arr); -} - -void f(Incomplete(&&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - std::ranges::begin(arr); -} - -void f2(Incomplete arr[2]) { - // expected-error@*:* {{call to deleted function call operator in type}} - // expected-error@*:* {{attempt to use a deleted function}} - std::ranges::begin(arr); -} - -void f(Incomplete(&arr)[2]) { - std::ranges::begin(arr); -} - -void f(Incomplete(&&arr)[2]) { - std::ranges::begin(arr); -} - -void f(Incomplete(&arr)[2][2]) { - std::ranges::begin(arr); -} diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp new file mode 100644 index 000000000000..4e17a61821dc --- /dev/null +++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-format + +// + +// namespace __format { enum class __arg_t : uint8_t{...}; } + +#include + +#include + +#include "test_macros.h" + +static_assert(std::is_same_v, uint8_t>); + +static_assert(uint8_t(std::__format::__arg_t::__none) == 0); +static_assert(uint8_t(std::__format::__arg_t::__boolean) == 1); +static_assert(uint8_t(std::__format::__arg_t::__char_type) == 2); +static_assert(uint8_t(std::__format::__arg_t::__int) == 3); +static_assert(uint8_t(std::__format::__arg_t::__long_long) == 4); +static_assert(uint8_t(std::__format::__arg_t::__i128) == 5); +static_assert(uint8_t(std::__format::__arg_t::__unsigned) == 6); +static_assert(uint8_t(std::__format::__arg_t::__unsigned_long_long) == 7); +static_assert(uint8_t(std::__format::__arg_t::__u128) == 8); +static_assert(uint8_t(std::__format::__arg_t::__float) == 9); +static_assert(uint8_t(std::__format::__arg_t::__double) == 10); +static_assert(uint8_t(std::__format::__arg_t::__long_double) == 11); +static_assert(uint8_t(std::__format::__arg_t::__const_char_type_ptr) == 12); +static_assert(uint8_t(std::__format::__arg_t::__string_view) == 13); +static_assert(uint8_t(std::__format::__arg_t::__ptr) == 14); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp index e71d4e24664a..f7e5770a7cd7 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp @@ -19,10 +19,10 @@ // bool operator> (directory_entry const&) const noexcept; // bool operator>=(directory_entry const&) const noexcept; - #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp index 97b5d23d9dbb..403d1ae338dd 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp @@ -19,8 +19,9 @@ // typedef ... iterator_category #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp index 3c0ae5d0886c..8fd23b60081b 100644 --- a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp @@ -21,8 +21,10 @@ // const path& path2() const noexcept; #include "filesystem_include.h" -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp index a7182557d3f7..9044b2f5a45e 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp @@ -15,8 +15,9 @@ // path& operator=(path const&); #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp index 69dff085e58f..0f3e4745fd53 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp @@ -17,8 +17,9 @@ // path& operator=(path&&) noexcept #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp index 31d00ffd2cd3..1155b7c1b3b5 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp @@ -15,8 +15,9 @@ // path(path const&) #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp index 40a8c54f802d..9f575ba61329 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp @@ -16,8 +16,9 @@ // path(path&&) noexcept #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp index e9e0a990165a..ab18399fd5d6 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp @@ -17,8 +17,9 @@ // static constexpr value_type preferred_separator = ...; #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp index 36380f64a9fe..d86d4f1590c0 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp @@ -10,8 +10,6 @@ // istream cerr; -// XFAIL: LIBCXX-WINDOWS-FIXME - // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh // RUN: %{build} diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp index b740300e8d32..f1f52d971eec 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp @@ -10,8 +10,6 @@ // istream clog; -// XFAIL: LIBCXX-WINDOWS-FIXME - // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh // RUN: %{build} diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp index c606312fa054..e5efa3070e88 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp @@ -10,8 +10,6 @@ // istream cout; -// XFAIL: LIBCXX-WINDOWS-FIXME - // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stdout.sh // RUN: %{build} diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp index 617b2d45c066..65c9cd52f58f 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp @@ -11,7 +11,6 @@ // istream wcerr; // XFAIL: libcpp-has-no-wide-characters -// XFAIL: LIBCXX-WINDOWS-FIXME // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp index e0ce4646ea9d..8ce29180b0fd 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp @@ -11,7 +11,6 @@ // istream wclog; // XFAIL: libcpp-has-no-wide-characters -// XFAIL: LIBCXX-WINDOWS-FIXME // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp index 6089432b0f56..a58feb400735 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp @@ -11,7 +11,6 @@ // istream wcout; // XFAIL: libcpp-has-no-wide-characters -// XFAIL: LIBCXX-WINDOWS-FIXME // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stdout.sh diff --git a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp index d0d06c477c98..b906ad928536 100644 --- a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp @@ -15,8 +15,11 @@ // unavailable until macOS 10.15 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} -// MinGW lacks timespec_get. -// XFAIL: target={{.+}}-windows-gnu +// MinGW used to lack timespec_get, but has got it in newer versions. +// Mark the test as unsupported for a transition period, to avoid spurious +// failures when upgrading the precommit testing environment. After +// the testing environment is upgraded, we could remove the marking altogether. +// UNSUPPORTED: target={{.+}}-windows-gnu #include #include diff --git a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp b/libcxx/test/std/ranges/range.access/begin.pass.cpp similarity index 89% rename from libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp rename to libcxx/test/std/ranges/range.access/begin.pass.cpp index 3b712a1cc137..95d2196803b2 100644 --- a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.access/begin.pass.cpp @@ -23,13 +23,17 @@ using RangeCBeginT = decltype(std::ranges::cbegin)&; static int globalBuff[8]; -struct Incomplete; - static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + struct BeginMember { int x; constexpr const int *begin() const { return &x; } @@ -245,28 +249,27 @@ constexpr bool testBeginFunction() { ASSERT_NOEXCEPT(std::ranges::begin(std::declval())); ASSERT_NOEXCEPT(std::ranges::cbegin(std::declval())); -template struct NoThrowMemberBegin { - T begin() const noexcept; -}; -ASSERT_NOEXCEPT(std::ranges::begin(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cbegin(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::begin(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cbegin(std::declval>&>())); + ThrowingIterator begin() const noexcept; // auto(t.begin()) doesn't throw +} ntmb; +static_assert(noexcept(std::ranges::begin(ntmb))); +static_assert(noexcept(std::ranges::cbegin(ntmb))); -template struct NoThrowADLBegin { - friend T begin(NoThrowADLBegin&) noexcept { return T{}; } - friend T begin(NoThrowADLBegin const&) noexcept { return T{}; } -}; -ASSERT_NOEXCEPT(std::ranges::begin(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cbegin(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::begin(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cbegin(std::declval>&>())); + friend ThrowingIterator begin(NoThrowADLBegin&) noexcept; // auto(begin(t)) doesn't throw + friend ThrowingIterator begin(const NoThrowADLBegin&) noexcept; +} ntab; +static_assert(noexcept(std::ranges::begin(ntab))); +static_assert(noexcept(std::ranges::cbegin(ntab))); + +struct NoThrowMemberBeginReturnsRef { + ThrowingIterator& begin() const noexcept; // auto(t.begin()) may throw +} ntmbrr; +static_assert(!noexcept(std::ranges::begin(ntmbrr))); +static_assert(!noexcept(std::ranges::cbegin(ntmbrr))); struct BeginReturnsArrayRef { auto begin() const noexcept -> int(&)[10]; - auto end() const noexcept -> int(&)[10]; } brar; static_assert(noexcept(std::ranges::begin(brar))); static_assert(noexcept(std::ranges::cbegin(brar))); diff --git a/libcxx/test/std/ranges/range.access/range.prim/data.pass.cpp b/libcxx/test/std/ranges/range.access/data.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.prim/data.pass.cpp rename to libcxx/test/std/ranges/range.access/data.pass.cpp diff --git a/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp b/libcxx/test/std/ranges/range.access/empty.pass.cpp similarity index 76% rename from libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp rename to libcxx/test/std/ranges/range.access/empty.pass.cpp index 5a06fa88348a..18cdce02b573 100644 --- a/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp +++ b/libcxx/test/std/ranges/range.access/empty.pass.cpp @@ -15,11 +15,11 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" using RangeEmptyT = decltype(std::ranges::empty); -using RangeSizeT = decltype(std::ranges::size); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -30,12 +30,27 @@ static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); -struct NonConstSizeAndEmpty { - int size(); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + +extern Incomplete array_of_incomplete[42]; +static_assert(!std::ranges::empty(array_of_incomplete)); +static_assert(!std::ranges::empty(std::move(array_of_incomplete))); +static_assert(!std::ranges::empty(std::as_const(array_of_incomplete))); +static_assert(!std::ranges::empty(static_cast(array_of_incomplete))); + +struct InputRangeWithoutSize { + cpp17_input_iterator begin() const; + cpp17_input_iterator end() const; +}; +static_assert(!std::is_invocable_v); + +struct NonConstEmpty { bool empty(); }; -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); struct HasMemberAndFunction { constexpr bool empty() const { return true; } @@ -49,7 +64,7 @@ struct BadReturnType { static_assert(!std::is_invocable_v); struct BoolConvertible { - constexpr /*TODO: explicit*/ operator bool() noexcept(false) { return true; } + constexpr explicit operator bool() noexcept(false) { return true; } }; struct BoolConvertibleReturnType { constexpr BoolConvertible empty() noexcept { return {}; } @@ -65,10 +80,10 @@ static_assert(!std::is_invocable_v); constexpr bool testEmptyMember() { HasMemberAndFunction a; - assert(std::ranges::empty(a) == true); + assert(std::ranges::empty(a)); BoolConvertibleReturnType b; - assert(std::ranges::empty(b) == true); + assert(std::ranges::empty(b)); return true; } @@ -92,17 +107,17 @@ static_assert(std::ranges::sized_range); constexpr bool testUsingRangesSize() { SizeMember a{1}; - assert(std::ranges::empty(a) == false); + assert(!std::ranges::empty(a)); SizeMember b{0}; - assert(std::ranges::empty(b) == true); + assert(std::ranges::empty(b)); SizeFunction c{1}; - assert(std::ranges::empty(c) == false); + assert(!std::ranges::empty(c)); SizeFunction d{0}; - assert(std::ranges::empty(d) == true); + assert(std::ranges::empty(d)); BeginEndSizedSentinel e; - assert(std::ranges::empty(e) == true); + assert(std::ranges::empty(e)); return true; } @@ -139,18 +154,16 @@ struct EvilBeginEnd { constexpr bool testBeginEqualsEnd() { BeginEndNotSizedSentinel a; - assert(std::ranges::empty(a) == true); + assert(std::ranges::empty(a)); DisabledSizeRangeWithBeginEnd d; - assert(std::ranges::empty(d) == true); + assert(std::ranges::empty(d)); BeginEndAndEmpty e; - assert(std::ranges::empty(e) == false); // e.empty() - assert(std::ranges::empty(std::as_const(e)) == true); // e.begin() == e.end() + assert(!std::ranges::empty(e)); // e.empty() + assert(std::ranges::empty(std::as_const(e))); // e.begin() == e.end() -#if 0 // TODO FIXME assert(std::ranges::empty(EvilBeginEnd())); -#endif return true; } diff --git a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp b/libcxx/test/std/ranges/range.access/end.pass.cpp similarity index 88% rename from libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp rename to libcxx/test/std/ranges/range.access/end.pass.cpp index dd5da2ebf594..84b4904d8f96 100644 --- a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp +++ b/libcxx/test/std/ranges/range.access/end.pass.cpp @@ -28,6 +28,12 @@ static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + struct EndMember { int x; constexpr const int *begin() const { return nullptr; } @@ -277,34 +283,34 @@ constexpr bool testEndFunction() { ASSERT_NOEXCEPT(std::ranges::end(std::declval())); ASSERT_NOEXCEPT(std::ranges::cend(std::declval())); -template struct NoThrowMemberEnd { - T begin() const; - T end() const noexcept; -}; -ASSERT_NOEXCEPT(std::ranges::end(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cend(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::end(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cend(std::declval>&>())); + ThrowingIterator begin() const; + ThrowingIterator end() const noexcept; // auto(t.end()) doesn't throw +} ntme; +static_assert(noexcept(std::ranges::end(ntme))); +static_assert(noexcept(std::ranges::cend(ntme))); -template struct NoThrowADLEnd { - T begin() const; - friend T end(NoThrowADLEnd&) noexcept { return T{}; } - friend T end(NoThrowADLEnd const&) noexcept { return T{}; } -}; -ASSERT_NOEXCEPT(std::ranges::end(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cend(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::end(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cend(std::declval>&>())); - -struct BeginReturnsArrayRef { + ThrowingIterator begin() const; + friend ThrowingIterator end(NoThrowADLEnd&) noexcept; // auto(end(t)) doesn't throw + friend ThrowingIterator end(const NoThrowADLEnd&) noexcept; +} ntae; +static_assert(noexcept(std::ranges::end(ntae))); +static_assert(noexcept(std::ranges::cend(ntae))); + +struct NoThrowMemberEndReturnsRef { + ThrowingIterator begin() const; + ThrowingIterator& end() const noexcept; // auto(t.end()) may throw +} ntmerr; +static_assert(!noexcept(std::ranges::end(ntmerr))); +static_assert(!noexcept(std::ranges::cend(ntmerr))); + +struct EndReturnsArrayRef { auto begin() const noexcept -> int(&)[10]; auto end() const noexcept -> int(&)[10]; -} brar; -static_assert(noexcept(std::ranges::end(brar))); -static_assert(noexcept(std::ranges::cend(brar))); - +} erar; +static_assert(noexcept(std::ranges::end(erar))); +static_assert(noexcept(std::ranges::cend(erar))); int main(int, char**) { testArray(); diff --git a/libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp deleted file mode 100644 index 92e92f232f7c..000000000000 --- a/libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp +++ /dev/null @@ -1 +0,0 @@ -// Tested in begin.pass.cpp. diff --git a/libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp deleted file mode 100644 index 924fea790e63..000000000000 --- a/libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp +++ /dev/null @@ -1 +0,0 @@ -// Tested in end.pass.cpp. diff --git a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp similarity index 93% rename from libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp rename to libcxx/test/std/ranges/range.access/size.pass.cpp index 17449744cd51..4d91e3dafebd 100644 --- a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/size.pass.cpp @@ -25,6 +25,17 @@ static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + +extern Incomplete array_of_incomplete[42]; +static_assert(std::ranges::size(array_of_incomplete) == 42); +static_assert(std::ranges::size(std::move(array_of_incomplete)) == 42); +static_assert(std::ranges::size(std::as_const(array_of_incomplete)) == 42); +static_assert(std::ranges::size(static_cast(array_of_incomplete)) == 42); + static_assert(std::semiregular>); struct SizeMember { @@ -124,7 +135,7 @@ struct SizeFunctionSigned { bool constexpr testHasSizeFunction() { assert(std::ranges::size(SizeFunction()) == 42); ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeFunction())), size_t); - assert(std::ranges::size(MoveOnlySizeFunction()) == 42); + static_assert(!std::is_invocable_v); assert(std::ranges::size(EnumSizeFunction()) == 42); assert(std::ranges::size(SizeFunctionConst()) == 42); diff --git a/libcxx/test/std/ranges/range.access/range.prim/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.prim/ssize.pass.cpp rename to libcxx/test/std/ranges/range.access/ssize.pass.cpp diff --git a/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp index 9fc24a66ee02..f210c619ca68 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp @@ -69,13 +69,8 @@ int main(int, char**) { using CommonIter = std::common_iterator>; std::same_as auto begin = common.begin(); assert(begin == std::ranges::begin(view)); - } - { - SizedForwardView view{buf, buf + 8}; - std::ranges::common_view const common(view); - using CommonIter = std::common_iterator>; - std::same_as auto begin = common.begin(); - assert(begin == std::ranges::begin(view)); + std::same_as auto cbegin = std::as_const(common).begin(); + assert(cbegin == std::ranges::begin(view)); } { diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp index 1562a188a802..8da2ec613e97 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp @@ -24,20 +24,13 @@ constexpr bool test() { int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8}; - auto sw = sentinel_wrapper(buffer + 8); // Note: not 4, but that's OK. - { - const std::ranges::take_view tv(MoveOnlyView{buffer}, 4); - assert(base(tv.end().base()) == base(sw)); - ASSERT_SAME_TYPE(decltype(tv.end().base()), sentinel_wrapper); + std::ranges::take_view tv(MoveOnlyView(buffer), 4); + std::same_as> auto sw1 = tv.end().base(); + assert(base(sw1) == buffer + 8); + std::same_as> auto sw2 = std::as_const(tv).end().base(); + assert(base(sw2) == buffer + 8); } - - { - std::ranges::take_view tv(MoveOnlyView{buffer}, 4); - assert(base(tv.end().base()) == base(sw)); - ASSERT_SAME_TYPE(decltype(tv.end().base()), sentinel_wrapper); - } - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp index 92a6c1770510..2b7b15d1092f 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp @@ -27,27 +27,39 @@ constexpr bool test() { { // Test the default ctor. - std::ranges::take_view tv(MoveOnlyView{buffer}, 4); - assert(decltype(tv.end()){} == std::ranges::next(tv.begin(), 4)); + using TakeView = std::ranges::take_view; + using Sentinel = std::ranges::sentinel_t; + Sentinel s; + TakeView tv = TakeView(MoveOnlyView(buffer), 4); + assert(tv.begin() + 4 == s); } { - std::ranges::take_view nonConst(MoveOnlyView{buffer}, 5); - const std::ranges::take_view tvConst(MoveOnlyView{buffer}, 5); - auto sent1 = nonConst.end(); - // Convert to const. Note, we cannot go the other way. - std::remove_cv_t sent2 = sent1; - - assert(sent1 == std::ranges::next(tvConst.begin(), 5)); - assert(sent2 == std::ranges::next(tvConst.begin(), 5)); + // Test the conversion from "sentinel" to "sentinel-to-const". + using TakeView = std::ranges::take_view; + using Sentinel = std::ranges::sentinel_t; + using ConstSentinel = std::ranges::sentinel_t; + static_assert(std::is_convertible_v); + TakeView tv = TakeView(MoveOnlyView(buffer), 4); + Sentinel s = tv.end(); + ConstSentinel cs = s; + cs = s; // test assignment also + assert(tv.begin() + 4 == s); + assert(tv.begin() + 4 == cs); + assert(std::as_const(tv).begin() + 4 == s); + assert(std::as_const(tv).begin() + 4 == cs); } { - std::ranges::take_view tv(CopyableView{buffer}, 6); - auto sw = sentinel_wrapper(buffer + 6); - using Sent = decltype(tv.end()); - Sent sent = Sent(sw); - assert(base(sent.base()) == base(sw)); + // Test the constructor from "base-sentinel" to "sentinel". + using TakeView = std::ranges::take_view; + using Sentinel = std::ranges::sentinel_t; + sentinel_wrapper sw1 = MoveOnlyView(buffer).end(); + static_assert( std::is_constructible_v>); + static_assert(!std::is_convertible_v, Sentinel>); + auto s = Sentinel(sw1); + std::same_as> auto sw2 = s.base(); + assert(base(sw2) == base(sw1)); } return true; diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp index c933e44f1a78..80b8c92bf251 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp @@ -25,44 +25,58 @@ #include "types.h" template -concept EndInvocable = requires(T t) { t.end(); }; - -template -concept EndIsIter = requires(T t) { ++t.end(); }; +concept HasConstQualifiedEnd = requires(const T& t) { t.end(); }; constexpr bool test() { { - std::ranges::transform_view transformView(MoveOnlyView{}, PlusOneMutable{}); - assert(transformView.end().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(base(end.base()) == globalBuff + 8); + static_assert(!HasConstQualifiedEnd); } - { - std::ranges::transform_view transformView(ForwardView{}, PlusOneMutable{}); - assert(transformView.end().base().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(!std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(base(base(end.base())) == globalBuff + 8); + static_assert(!HasConstQualifiedEnd); } - { - std::ranges::transform_view transformView(InputView{}, PlusOneMutable{}); - assert(transformView.end().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(!std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(base(base(end.base())) == globalBuff + 8); + auto cend = std::as_const(tv).end(); + ASSERT_SAME_TYPE(decltype(cend.base()), std::ranges::sentinel_t); + assert(base(base(cend.base())) == globalBuff + 8); } - { - const std::ranges::transform_view transformView(MoveOnlyView{}, PlusOne{}); - assert(transformView.end().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(end.base() == globalBuff + 8); + static_assert(!HasConstQualifiedEnd); + } + { + using TransformView = std::ranges::transform_view; + static_assert(std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(end.base() == globalBuff + 8); + auto cend = std::as_const(tv).end(); + ASSERT_SAME_TYPE(decltype(cend.base()), std::ranges::sentinel_t); + assert(cend.base() == globalBuff + 8); } - - static_assert(!EndInvocable>); - static_assert( EndInvocable< std::ranges::transform_view>); - static_assert( EndInvocable>); - static_assert(!EndInvocable>); - static_assert( EndInvocable< std::ranges::transform_view>); - static_assert( EndInvocable>); - - static_assert(!EndIsIter>); - static_assert(!EndIsIter< std::ranges::transform_view>); - static_assert( EndIsIter>); - static_assert( EndIsIter< std::ranges::transform_view>); - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp index a69739647d6d..26fc953023c8 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp @@ -17,30 +17,32 @@ #include "test_macros.h" #include "../types.h" -template -concept BaseInvocable = requires(std::ranges::iterator_t> iter) { - iter.base(); +template +concept HasBase = requires(It it) { + static_cast(it).base(); }; constexpr bool test() { { - std::ranges::transform_view transformView; - auto iter = std::move(transformView).begin(); - ASSERT_SAME_TYPE(int*, decltype(iter.base())); - assert(iter.base() == globalBuff); - ASSERT_SAME_TYPE(int*, decltype(std::move(iter).base())); - assert(std::move(iter).base() == globalBuff); + using TransformView = std::ranges::transform_view; + TransformView tv; + auto begin = tv.begin(); + ASSERT_SAME_TYPE(decltype(begin.base()), int*); + assert(begin.base() == globalBuff); + ASSERT_SAME_TYPE(decltype(std::move(begin).base()), int*); + assert(std::move(begin).base() == globalBuff); } - { - std::ranges::transform_view transformView; - auto iter = transformView.begin(); - assert(std::move(iter).base() == globalBuff); - ASSERT_SAME_TYPE(cpp20_input_iterator, decltype(std::move(iter).base())); + using TransformView = std::ranges::transform_view; + TransformView tv; + auto begin = tv.begin(); + static_assert(!HasBase); + static_assert(HasBase); + static_assert(!HasBase); + static_assert(!HasBase); + std::same_as> auto it = std::move(begin).base(); + assert(base(it) == globalBuff); } - - static_assert(!BaseInvocable); - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h index 3c1ed0f564a8..80ffa6a0a67d 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h @@ -85,12 +85,11 @@ static_assert( std::ranges::borrowed_range); struct InputView : std::ranges::view_base { int *ptr_; constexpr explicit InputView(int* ptr = globalBuff) : ptr_(ptr) {} - constexpr cpp20_input_iterator begin() const { return cpp20_input_iterator(ptr_); } - constexpr int *end() const { return ptr_ + 8; } + constexpr auto begin() const { return cpp20_input_iterator(ptr_); } + constexpr auto end() const { return sentinel_wrapper>(cpp20_input_iterator(ptr_ + 8)); } }; -// TODO: remove these bogus operators -constexpr bool operator==(const cpp20_input_iterator &lhs, int* rhs) { return lhs.base() == rhs; } -constexpr bool operator==(int* lhs, const cpp20_input_iterator &rhs) { return rhs.base() == lhs; } +static_assert( std::ranges::view); +static_assert(!std::ranges::sized_range); struct SizedSentinelView : std::ranges::view_base { int count_; diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp index c0468fd8ca22..c3d5189e1bba 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp @@ -21,7 +21,7 @@ // (It's fixed in the upcoming Clang 14, by https://reviews.llvm.org/D109651.) // Prior to the fix, when statically linked, the unwind info for the two // (default and overridden) operator new implementations clash. -// XFAIL: target={{.+}}-windows-gnu && !windows-dll && clang-13 +// UNSUPPORTED: target={{.+}}-windows-gnu && !windows-dll && clang-13 #include #include diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h index 00f4701e881a..44a01645dc49 100644 --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -14,9 +14,10 @@ #endif #include +#include #include // for printf #include -#include +#include #include #include "make_string.h" diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 5c5b1f964859..f27979dcb578 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -108,8 +108,7 @@ function generate-cmake-libcxx-win() { -DCMAKE_C_COMPILER=clang-cl \ -DCMAKE_CXX_COMPILER=clang-cl \ -DLIBCXX_ENABLE_FILESYSTEM=YES \ - -DCMAKE_CXX_FLAGS="-D_LIBCPP_HAS_NO_INT128" \ - -DLIBCXX_TEST_COMPILER_FLAGS="-D_LIBCPP_HAS_NO_INT128" \ + -DLIBCXX_EXTRA_SITE_DEFINES="_LIBCPP_HAS_NO_INT128" \ "${@}" } @@ -598,13 +597,15 @@ clang-cl-dll) # correctly when libc++ visibility attributes indicate dllimport linkage # anyway), thus just disable the experimental library. Remove this # setting when cmake and the test driver does the right thing automatically. - generate-cmake-libcxx-win -DLIBCXX_ENABLE_EXPERIMENTAL_LIBRARY=OFF + generate-cmake-libcxx-win -DLIBCXX_ENABLE_EXPERIMENTAL_LIBRARY=OFF \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-shared-clangcl.cfg.in" echo "+++ Running the libc++ tests" ${NINJA} -vC "${BUILD_DIR}" check-cxx ;; clang-cl-static) clean - generate-cmake-libcxx-win -DLIBCXX_ENABLE_SHARED=OFF + generate-cmake-libcxx-win -DLIBCXX_ENABLE_SHARED=OFF \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-static-clangcl.cfg.in" echo "+++ Running the libc++ tests" ${NINJA} -vC "${BUILD_DIR}" check-cxx ;; @@ -618,6 +619,7 @@ mingw-dll) generate-cmake \ -DCMAKE_C_COMPILER=x86_64-w64-mingw32-clang \ -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-clang++ \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-mingw.cfg.in" \ -C "${MONOREPO_ROOT}/libcxx/cmake/caches/MinGW.cmake" echo "+++ Running the libc++ tests" ${NINJA} -vC "${BUILD_DIR}" check-cxx @@ -627,6 +629,7 @@ mingw-static) generate-cmake \ -DCMAKE_C_COMPILER=x86_64-w64-mingw32-clang \ -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-clang++ \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-mingw.cfg.in" \ -C "${MONOREPO_ROOT}/libcxx/cmake/caches/MinGW.cmake" \ -DLIBCXX_ENABLE_SHARED=OFF \ -DLIBUNWIND_ENABLE_SHARED=OFF diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index a28590ef0bc1..d61743e49bff 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -160,14 +160,6 @@ def print_config_info(self): self.lit_config.note("Linking against the ABI Library at {}".format(self.abi_library_root)) self.lit_config.note("Running against the ABI Library at {}".format(self.abi_runtime_root)) - def get_test_format(self): - from libcxx.test.format import LibcxxTestFormat - return LibcxxTestFormat( - self.cxx, - self.use_clang_verify, - self.executor, - exec_env=self.exec_env) - def configure_cxx(self): # Gather various compiler parameters. cxx = self.get_lit_conf('cxx_under_test') diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index e4755fec2316..429e2d82c97b 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -75,8 +75,14 @@ # Whether Bash can run on the executor. # This is not always the case, for example when running on embedded systems. + # + # For the corner case of bash existing, but it being missing in the path + # set in %{exec} as "--env PATH=one-single-dir", the executor does find + # and executes bash, but bash then can't find any other common shell + # utilities. Test executing "bash -c 'bash --version'" to see if bash + # manages to find binaries to execute. Feature(name='executor-has-no-bash', - when=lambda cfg: runScriptExitCode(cfg, ['%{exec} bash --version']) != 0), + when=lambda cfg: runScriptExitCode(cfg, ['%{exec} bash -c \'bash --version\'']) != 0), Feature(name='apple-clang', when=_isAppleClang), Feature(name=lambda cfg: 'apple-clang-{__clang_major__}'.format(**compilerMacros(cfg)), when=_isAppleClang), diff --git a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in index 4165d7a9c249..4d0df23e0d67 100644 --- a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in +++ b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in @@ -1,7 +1,5 @@ # Testing configuration for Apple's system libc++abi. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++ -lc++abi' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env DYLD_LIBRARY_PATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env DYLD_LIBRARY_PATH=%{install}/lib -- ' )) import os, site diff --git a/libcxxabi/test/configs/cmake-bridge.cfg.in b/libcxxabi/test/configs/cmake-bridge.cfg.in index 469872c7060b..f7ede5f16acf 100644 --- a/libcxxabi/test/configs/cmake-bridge.cfg.in +++ b/libcxxabi/test/configs/cmake-bridge.cfg.in @@ -29,3 +29,4 @@ config.host_triple = '@LLVM_HOST_TRIPLE@' config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) config.substitutions.append(('%{libcxx}', '@LIBCXXABI_LIBCXX_PATH@')) config.substitutions.append(('%{install}', '@CMAKE_BINARY_DIR@')) +config.substitutions.append(('%{executor}', '@LIBCXXABI_EXECUTOR@')) diff --git a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in index feb5e7b3e9dc..06d9aca508a4 100644 --- a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in +++ b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in @@ -1,7 +1,5 @@ # Testing configuration for libc++abi on AIX. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}','')) @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++ -lc++abi -Wl,-bbigtoc' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env LIBPATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env LIBPATH=%{install}/lib -- ' )) import os, site diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index f1b0c5c0707d..07b60673577e 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2085,7 +2085,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) - if (dyn_cast_or_null(ctx.symtab.find(arg->getValue()))) + if (isa_and_nonnull(ctx.symtab.find(arg->getValue()))) addUndefined(arg->getValue()); while (run()); } diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 3a7d3f477e02..e28b62329494 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -77,7 +77,7 @@ void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE. uint32_t glink = in.plt->getVA(); // VA of .glink if (!config->isPic) { - for (const Symbol *sym : cast(in.plt)->canonical_plts) { + for (const Symbol *sym : cast(*in.plt).canonical_plts) { writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); buf += 16; glink += 16; diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index aa00d6eadbf9..5b07f0e18c8a 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -114,8 +114,8 @@ CallGraphSort::CallGraphSort() { // Create the graph. for (std::pair &c : profile) { - const auto *fromSB = cast(c.first.first->repl); - const auto *toSB = cast(c.first.second->repl); + const auto *fromSB = cast(c.first.first); + const auto *toSB = cast(c.first.second); uint64_t weight = c.second; // Ignore edges between input sections belonging to different output diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 0c8ce62c566a..b3d5219ff57b 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -22,6 +22,7 @@ #include "llvm/Support/GlobPattern.h" #include "llvm/Support/PrettyStackTrace.h" #include +#include #include namespace lld { @@ -343,7 +344,7 @@ struct Configuration { }; // The only instance of Configuration struct. -extern Configuration *config; +extern std::unique_ptr config; // The first two elements of versionDefinitions represent VER_NDX_LOCAL and // VER_NDX_GLOBAL. This helper returns other elements. diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index 4d84c09a0185..789820ba7a8e 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -27,8 +27,7 @@ using namespace lld::elf; template LLDDwarfObj::LLDDwarfObj(ObjFile *obj) { // Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below. - ArrayRef objSections = - CHECK(obj->getObj().sections(), obj); + ArrayRef objSections = obj->template getELFShdrs(); assert(objSections.size() == obj->getSections().size()); for (auto it : llvm::enumerate(obj->getSections())) { InputSectionBase *sec = it.value(); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index d0007449a2b1..6b689f50cce7 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -71,8 +71,8 @@ using namespace llvm::support; using namespace lld; using namespace lld::elf; -Configuration *elf::config; -LinkerDriver *elf::driver; +std::unique_ptr elf::config; +std::unique_ptr elf::driver; static void setConfigs(opt::InputArgList &args); static void readConfigs(opt::InputArgList &args); @@ -90,7 +90,7 @@ bool elf::link(ArrayRef args, bool canExitEarly, archiveFiles.clear(); binaryFiles.clear(); bitcodeFiles.clear(); - lazyObjFiles.clear(); + lazyBitcodeFiles.clear(); objectFiles.clear(); sharedFiles.clear(); backwardReferences.clear(); @@ -111,10 +111,10 @@ bool elf::link(ArrayRef args, bool canExitEarly, errorHandler().exitEarly = canExitEarly; stderrOS.enable_colors(stderrOS.has_colors()); - config = make(); - driver = make(); - script = make(); - symtab = make(); + config = std::make_unique(); + driver = std::make_unique(); + script = std::make_unique(); + symtab = std::make_unique(); partitions = {Partition()}; @@ -248,7 +248,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { for (const std::pair &p : getArchiveMembers(mbref)) - files.push_back(make(p.first, path, p.second)); + files.push_back(createLazyFile(p.first, path, p.second)); return; } @@ -273,7 +273,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { case file_magic::bitcode: case file_magic::elf_relocatable: if (inLib) - files.push_back(make(mbref, "", 0)); + files.push_back(createLazyFile(mbref, "", 0)); else files.push_back(createObjectFile(mbref)); break; @@ -846,14 +846,13 @@ static bool processCallGraphRelocations(SmallVector &symbolIndices, ArrayRef &cgProfile, ObjFile *inputObj) { - symbolIndices.clear(); - const ELFFile &obj = inputObj->getObj(); - ArrayRef> objSections = - CHECK(obj.sections(), "could not retrieve object sections"); - if (inputObj->cgProfileSectionIndex == SHN_UNDEF) return false; + ArrayRef> objSections = + inputObj->template getELFShdrs(); + symbolIndices.clear(); + const ELFFile &obj = inputObj->getObj(); cgProfile = check(obj.template getSectionContentsAsArray( objSections[inputObj->cgProfileSectionIndex])); @@ -1822,17 +1821,21 @@ static void writeDependencyFile() { // symbols of type CommonSymbol. static void replaceCommonSymbols() { llvm::TimeTraceScope timeScope("Replace common symbols"); - for (Symbol *sym : symtab->symbols()) { - auto *s = dyn_cast(sym); - if (!s) + for (ELFFileBase *file : objectFiles) { + if (!file->hasCommonSyms) continue; + for (Symbol *sym : file->getGlobalSymbols()) { + auto *s = dyn_cast(sym); + if (!s) + continue; - auto *bss = make("COMMON", s->size, s->alignment); - bss->file = s->file; - bss->markDead(); - inputSections.push_back(bss); - s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, - /*value=*/0, s->size, bss}); + auto *bss = make("COMMON", s->size, s->alignment); + bss->file = s->file; + bss->markDead(); + inputSections.push_back(bss); + s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, + /*value=*/0, s->size, bss}); + } } } @@ -2120,11 +2123,10 @@ static void redirectSymbols(ArrayRef wrapped) { return; // Update pointers in input files. - parallelForEach(objectFiles, [&](InputFile *file) { - MutableArrayRef syms = file->getMutableSymbols(); - for (size_t i = 0, e = syms.size(); i != e; ++i) - if (Symbol *s = map.lookup(syms[i])) - syms[i] = s; + parallelForEach(objectFiles, [&](ELFFileBase *file) { + for (Symbol *&sym : file->getMutableGlobalSymbols()) + if (Symbol *s = map.lookup(sym)) + sym = s; }); // Update pointers in the symbol table. diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h index 96d040041c5a..5961e1f69472 100644 --- a/lld/ELF/Driver.h +++ b/lld/ELF/Driver.h @@ -22,7 +22,7 @@ namespace lld { namespace elf { -extern class LinkerDriver *driver; +extern std::unique_ptr driver; class LinkerDriver { public: diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 0ec748e8f990..ec63d2ef4d6f 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -550,6 +550,22 @@ template void ICF::run() { } }); + // Change Defined symbol's section field to the canonical one. + auto fold = [](Symbol *sym) { + if (auto *d = dyn_cast(sym)) + if (auto *sec = dyn_cast_or_null(d->section)) + if (sec->repl != d->section) { + d->section = sec->repl; + d->folded = true; + } + }; + for (Symbol *sym : symtab->symbols()) + fold(sym); + parallelForEach(objectFiles, [&](ELFFileBase *file) { + for (Symbol *sym : file->getLocalSymbols()) + fold(sym); + }); + // InputSectionDescription::sections is populated by processSectionCommands(). // ICF may fold some input sections assigned to output sections. Remove them. for (SectionCommand *cmd : script->sectionCommands) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index fb1fca1c7f0f..e321b0d82920 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -43,12 +43,12 @@ using namespace lld::elf; bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; -std::vector elf::archiveFiles; -std::vector elf::binaryFiles; -std::vector elf::bitcodeFiles; -std::vector elf::lazyObjFiles; -std::vector elf::objectFiles; -std::vector elf::sharedFiles; +SmallVector elf::archiveFiles; +SmallVector elf::binaryFiles; +SmallVector elf::bitcodeFiles; +SmallVector elf::lazyBitcodeFiles; +SmallVector elf::objectFiles; +SmallVector elf::sharedFiles; std::unique_ptr elf::tar; @@ -186,9 +186,13 @@ template static void doParseFile(InputFile *file) { } // Lazy object file - if (auto *f = dyn_cast(file)) { - lazyObjFiles.push_back(f); - f->parse(); + if (file->lazy) { + if (auto *f = dyn_cast(file)) { + lazyBitcodeFiles.push_back(f); + f->parseLazy(); + } else { + cast>(file)->parseLazy(); + } return; } @@ -366,6 +370,8 @@ template void ELFFileBase::init() { abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; ArrayRef sections = CHECK(obj.sections(), this); + elfShdrs = sections.data(); + numELFShdrs = sections.size(); // Find a symbol table. bool isDSO = @@ -473,8 +479,7 @@ bool ObjFile::shouldMerge(const Elf_Shdr &sec, StringRef name) { // When the option is given, we link "just symbols". The section table is // initialized with null pointers. template void ObjFile::initializeJustSymbols() { - ArrayRef sections = CHECK(this->getObj().sections(), this); - this->sections.resize(sections.size()); + sections.resize(numELFShdrs); } // An ELF object file may contain a `.deplibs` section. If it exists, the @@ -540,7 +545,7 @@ template void ObjFile::initializeSections(bool ignoreComdats) { const ELFFile &obj = this->getObj(); - ArrayRef objSections = CHECK(obj.sections(), this); + ArrayRef objSections = getELFShdrs(); StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); uint64_t size = objSections.size(); this->sections.resize(size); @@ -1046,62 +1051,62 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // Initialize this->Symbols. this->Symbols is a parallel array as // its corresponding ELF symbol table. template void ObjFile::initializeSymbols() { + ArrayRef sections(this->sections); + SymbolTable &symtab = *elf::symtab; + ArrayRef eSyms = this->getELFSyms(); - this->symbols.resize(eSyms.size()); + symbols.resize(eSyms.size()); SymbolUnion *locals = firstGlobal == 0 ? nullptr : getSpecificAllocSingleton().Allocate(firstGlobal); - for (size_t i = 0; i != firstGlobal; ++i) { + for (size_t i = 0, end = firstGlobal; i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; uint32_t secIdx = getSectionIndex(eSym); - if (secIdx >= this->sections.size()) + if (LLVM_UNLIKELY(secIdx >= sections.size())) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); - if (eSym.getBinding() != STB_LOCAL) + if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) error(toString(this) + ": non-local symbol (" + Twine(i) + - ") found at index < .symtab's sh_info (" + Twine(firstGlobal) + - ")"); + ") found at index < .symtab's sh_info (" + Twine(end) + ")"); - InputSectionBase *sec = this->sections[secIdx]; + InputSectionBase *sec = sections[secIdx]; uint8_t type = eSym.getType(); if (type == STT_FILE) - sourceFile = CHECK(eSym.getName(this->stringTable), this); - if (this->stringTable.size() <= eSym.st_name) + sourceFile = CHECK(eSym.getName(stringTable), this); + if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name)) fatal(toString(this) + ": invalid symbol name offset"); - StringRefZ name = this->stringTable.data() + eSym.st_name; + StringRefZ name = stringTable.data() + eSym.st_name; - this->symbols[i] = reinterpret_cast(locals + i); + symbols[i] = reinterpret_cast(locals + i); if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) - new (this->symbols[i]) - Undefined(this, name, STB_LOCAL, eSym.st_other, type, - /*discardedSecIdx=*/secIdx); + new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, + /*discardedSecIdx=*/secIdx); else - new (this->symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, - eSym.st_value, eSym.st_size, sec); + new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, + eSym.st_value, eSym.st_size, sec); } // Some entries have been filled by LazyObjFile. for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) - if (!this->symbols[i]) - this->symbols[i] = - symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); + if (!symbols[i]) + symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); // Perform symbol resolution on non-local symbols. SmallVector undefineds; for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; uint8_t binding = eSym.getBinding(); - if (binding == STB_LOCAL) { + if (LLVM_UNLIKELY(binding == STB_LOCAL)) { errorOrWarn(toString(this) + ": STB_LOCAL symbol (" + Twine(i) + ") found at index >= .symtab's sh_info (" + Twine(firstGlobal) + ")"); continue; } uint32_t secIdx = getSectionIndex(eSym); - if (secIdx >= this->sections.size()) + if (LLVM_UNLIKELY(secIdx >= sections.size())) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); - InputSectionBase *sec = this->sections[secIdx]; + InputSectionBase *sec = sections[secIdx]; uint8_t stOther = eSym.st_other; uint8_t type = eSym.getType(); uint64_t value = eSym.st_value; @@ -1112,12 +1117,13 @@ template void ObjFile::initializeSymbols() { continue; } - Symbol *sym = this->symbols[i]; + Symbol *sym = symbols[i]; const StringRef name = sym->getName(); - if (eSym.st_shndx == SHN_COMMON) { + if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { if (value == 0 || value >= UINT32_MAX) fatal(toString(this) + ": common symbol '" + name + "' has invalid alignment: " + Twine(value)); + hasCommonSyms = true; sym->resolve( CommonSymbol{this, name, binding, stOther, type, value, size}); continue; @@ -1130,15 +1136,14 @@ template void ObjFile::initializeSymbols() { // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { Undefined und{this, name, binding, stOther, type, secIdx}; - // !ArchiveFile::parsed or LazyObjFile::extracted means that the file + // !ArchiveFile::parsed or !LazyObjFile::lazy means that the file // containing this object has not finished processing, i.e. this symbol is // a result of a lazy symbol extract. We should demote the lazy symbol to // an Undefined so that any relocations outside of the group to it will // trigger a discarded section error. if ((sym->symbolKind == Symbol::LazyArchiveKind && !cast(sym->file)->parsed) || - (sym->symbolKind == Symbol::LazyObjectKind && - cast(sym->file)->extracted)) { + (sym->symbolKind == Symbol::LazyObjectKind && !sym->file->lazy)) { sym->replace(und); // Prevent LTO from internalizing the symbol in case there is a // reference to this symbol from this file. @@ -1167,7 +1172,7 @@ template void ObjFile::initializeSymbols() { // being resolved to different files. for (unsigned i : undefineds) { const Elf_Sym &eSym = eSyms[i]; - Symbol *sym = this->symbols[i]; + Symbol *sym = symbols[i]; sym->resolve(Undefined{this, sym->getName(), eSym.getBinding(), eSym.st_other, eSym.getType()}); sym->referenced = true; @@ -1179,8 +1184,9 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&file) file(std::move(file)) {} void ArchiveFile::parse() { + SymbolTable &symtab = *elf::symtab; for (const Archive::Symbol &sym : file->symbols()) - symtab->addSymbol(LazyArchive{*this, sym}); + symtab.addSymbol(LazyArchive{*this, sym}); // Inform a future invocation of ObjFile::initializeSymbols() that this // archive has been processed. @@ -1406,7 +1412,7 @@ template void SharedFile::parse() { ArrayRef dynamicTags; const ELFFile obj = this->getObj(); - ArrayRef sections = CHECK(obj.sections(), this); + ArrayRef sections = getELFShdrs(); const Elf_Shdr *versymSec = nullptr; const Elf_Shdr *verdefSec = nullptr; @@ -1491,6 +1497,7 @@ template void SharedFile::parse() { SmallString<0> versionedNameBuffer; // Add symbols to the symbol table. + SymbolTable &symtab = *elf::symtab; ArrayRef syms = this->getGlobalELFSyms(); for (size_t i = 0, e = syms.size(); i != e; ++i) { const Elf_Sym &sym = syms[i]; @@ -1499,7 +1506,7 @@ template void SharedFile::parse() { // symbols in each symbol table, and the index of first non-local symbol // is stored to sh_info. If a local symbol appears after some non-local // symbol, that's a violation of the spec. - StringRef name = CHECK(sym.getName(this->stringTable), this); + StringRef name = CHECK(sym.getName(stringTable), this); if (sym.getBinding() == STB_LOCAL) { warn("found local symbol '" + name + "' in global part of symbol table in file " + toString(this)); @@ -1517,12 +1524,12 @@ template void SharedFile::parse() { toString(this)); continue; } - StringRef verName = this->stringTable.data() + verneeds[idx]; + StringRef verName = stringTable.data() + verneeds[idx]; versionedNameBuffer.clear(); name = saver.save((name + "@" + verName).toStringRef(versionedNameBuffer)); } - Symbol *s = symtab->addSymbol( + Symbol *s = symtab.addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); s->exportDynamic = true; if (s->isUndefined() && sym.getBinding() != STB_WEAK && @@ -1540,9 +1547,9 @@ template void SharedFile::parse() { uint32_t alignment = getAlignment(sections, sym); if (!(versyms[i] & VERSYM_HIDDEN)) { - symtab->addSymbol(SharedSymbol{*this, name, sym.getBinding(), - sym.st_other, sym.getType(), sym.st_value, - sym.st_size, alignment, idx}); + symtab.addSymbol(SharedSymbol{*this, name, sym.getBinding(), sym.st_other, + sym.getType(), sym.st_value, sym.st_size, + alignment, idx}); } // Also add the symbol with the versioned name to handle undefined symbols @@ -1558,13 +1565,13 @@ template void SharedFile::parse() { } StringRef verName = - this->stringTable.data() + + stringTable.data() + reinterpret_cast(verdefs[idx])->getAux()->vda_name; versionedNameBuffer.clear(); name = (name + "@" + verName).toStringRef(versionedNameBuffer); - symtab->addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(), - sym.st_other, sym.getType(), sym.st_value, - sym.st_size, alignment, idx}); + symtab.addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(), + sym.st_other, sym.getType(), sym.st_value, + sym.st_size, alignment, idx}); } } @@ -1630,9 +1637,10 @@ static uint8_t getOsAbi(const Triple &t) { } BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) + uint64_t offsetInArchive, bool lazy) : InputFile(BitcodeKind, mb) { this->archiveName = archiveName; + this->lazy = lazy; std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) @@ -1711,13 +1719,22 @@ template void BitcodeFile::parse() { .second); } - for (const lto::InputFile::Symbol &objSym : obj->symbols()) - symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this)); + symbols.assign(obj->symbols().size(), nullptr); + for (auto it : llvm::enumerate(obj->symbols())) + symbols[it.index()] = + createBitcodeSymbol(keptComdats, it.value(), *this); for (auto l : obj->getDependentLibraries()) addDependentLibrary(l, this); } +void BitcodeFile::parseLazy() { + SymbolTable &symtab = *elf::symtab; + for (const lto::InputFile::Symbol &sym : obj->symbols()) + if (!sym.isUndefined()) + symtab.addSymbol(LazyObject{*this, saver.save(sym.getName())}); +} + void BinaryFile::parse() { ArrayRef data = arrayRefFromStringRef(mb.getBuffer()); auto *section = make(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, @@ -1744,7 +1761,7 @@ void BinaryFile::parse() { InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) { if (isBitcode(mb)) - return make(mb, archiveName, offsetInArchive); + return make(mb, archiveName, offsetInArchive, /*lazy=*/false); switch (getELFKind(mb, archiveName)) { case ELF32LEKind: @@ -1760,81 +1777,40 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } } -void LazyObjFile::extract() { - if (extracted) - return; - extracted = true; - - InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); - file->groupId = groupId; - - // Copy symbol vector so that the new InputFile doesn't have to - // insert the same defined symbols to the symbol table again. - file->symbols = std::move(symbols); +InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive) { + if (isBitcode(mb)) + return make(mb, archiveName, offsetInArchive, /*lazy=*/true); - parseFile(file); + auto *file = + cast(createObjectFile(mb, archiveName, offsetInArchive)); + file->lazy = true; + return file; } -template void LazyObjFile::parse() { - using Elf_Sym = typename ELFT::Sym; +template void ObjFile::parseLazy() { + const ArrayRef eSyms = this->getELFSyms(); + SymbolTable &symtab = *elf::symtab; - // A lazy object file wraps either a bitcode file or an ELF file. - if (isBitcode(this->mb)) { - std::unique_ptr obj = - CHECK(lto::InputFile::create(this->mb), this); - for (const lto::InputFile::Symbol &sym : obj->symbols()) { - if (sym.isUndefined()) - continue; - symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())}); - } - return; - } - - if (getELFKind(this->mb, archiveName) != config->ekind) { - error("incompatible file: " + this->mb.getBufferIdentifier()); - return; - } - - // Find a symbol table. - ELFFile obj = check(ELFFile::create(mb.getBuffer())); - ArrayRef sections = CHECK(obj.sections(), this); - - for (const typename ELFT::Shdr &sec : sections) { - if (sec.sh_type != SHT_SYMTAB) - continue; - - // A symbol table is found. - ArrayRef eSyms = CHECK(obj.symbols(&sec), this); - uint32_t firstGlobal = sec.sh_info; - StringRef strtab = CHECK(obj.getStringTableForSymtab(sec, sections), this); - this->symbols.resize(eSyms.size()); - - // Get existing symbols or insert placeholder symbols. - for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) - if (eSyms[i].st_shndx != SHN_UNDEF) - this->symbols[i] = - symtab->insert(CHECK(eSyms[i].getName(strtab), this)); + symbols.resize(eSyms.size()); + for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) + if (eSyms[i].st_shndx != SHN_UNDEF) + symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); - // Replace existing symbols with LazyObject symbols. - // - // resolve() may trigger this->extract() if an existing symbol is an - // undefined symbol. If that happens, this LazyObjFile has served - // its purpose, and we can exit from the loop early. - for (Symbol *sym : this->symbols) { - if (!sym) - continue; + // Replace existing symbols with LazyObject symbols. + // + // resolve() may trigger this->extract() if an existing symbol is an undefined + // symbol. If that happens, this function has served its purpose, and we can + // exit from the loop early. + for (Symbol *sym : makeArrayRef(symbols).slice(firstGlobal)) + if (sym) { sym->resolve(LazyObject{*this, sym->getName()}); - - // If extracted, stop iterating because this->symbols has been transferred - // to the instantiated ObjFile. - if (extracted) + if (!lazy) return; } - return; - } } -bool LazyObjFile::shouldExtractForCommon(const StringRef &name) { +bool InputFile::shouldExtractForCommon(StringRef name) { if (isBitcode(mb)) return isBitcodeNonCommonDef(mb, name, archiveName); @@ -1855,11 +1831,6 @@ template void BitcodeFile::parse(); template void BitcodeFile::parse(); template void BitcodeFile::parse(); -template void LazyObjFile::parse(); -template void LazyObjFile::parse(); -template void LazyObjFile::parse(); -template void LazyObjFile::parse(); - template class elf::ObjFile; template class elf::ObjFile; template class elf::ObjFile; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 1075029b8df9..d622390fcade 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -41,6 +41,7 @@ namespace elf { using llvm::object::Archive; +class InputSection; class Symbol; // If --reproduce is specified, all input files are written to this tar archive. @@ -90,9 +91,7 @@ class InputFile { // Returns object file symbols. It is a runtime error to call this // function on files of other types. - ArrayRef getSymbols() { return getMutableSymbols(); } - - MutableArrayRef getMutableSymbols() { + ArrayRef getSymbols() const { assert(fileKind == BinaryKind || fileKind == ObjKind || fileKind == BitcodeKind); return symbols; @@ -101,6 +100,10 @@ class InputFile { // Get filename to use for linker script processing. StringRef getNameForScript() const; + // Check if a non-common symbol should be extracted to override a common + // definition. + bool shouldExtractForCommon(StringRef name); + // If not empty, this stores the name of the archive containing this file. // We use this string for creating error messages. SmallString<0> archiveName; @@ -110,12 +113,12 @@ class InputFile { SmallVector symbols; - // Index of MIPS GOT built for this file. - llvm::Optional mipsGotIndex; + // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute + // offsets in PLT call stubs. + InputSection *ppc32Got2 = nullptr; - // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE - // to compute offsets in PLT call stubs. - uint32_t ppc32Got2OutSecOff = 0; + // Index of MIPS GOT built for this file. + uint32_t mipsGotIndex = -1; // groupId is used for --warn-backrefs which is an optional error // checking feature. All files within the same --{start,end}-group or @@ -132,6 +135,11 @@ class InputFile { ELFKind ekind = ELFNoneKind; uint8_t osabi = 0; uint8_t abiVersion = 0; + + // True if this is a relocatable object file/bitcode file between --start-lib + // and --end-lib. + bool lazy = false; + // True if this is an argument for --just-symbols. Usually false. bool justSymbols = false; @@ -176,7 +184,15 @@ class ELFFileBase : public InputFile { ArrayRef getGlobalSymbols() { return llvm::makeArrayRef(symbols).slice(firstGlobal); } + MutableArrayRef getMutableGlobalSymbols() { + return llvm::makeMutableArrayRef(symbols.data(), symbols.size()) + .slice(firstGlobal); + } + template typename ELFT::ShdrRange getELFShdrs() const { + return typename ELFT::ShdrRange( + reinterpret_cast(elfShdrs), numELFShdrs); + } template typename ELFT::SymRange getELFSyms() const { return typename ELFT::SymRange( reinterpret_cast(elfSyms), numELFSyms); @@ -189,10 +205,15 @@ class ELFFileBase : public InputFile { // Initializes this class's member variables. template void init(); + StringRef stringTable; + const void *elfShdrs = nullptr; const void *elfSyms = nullptr; + uint32_t numELFShdrs = 0; uint32_t numELFSyms = 0; uint32_t firstGlobal = 0; - StringRef stringTable; + +public: + bool hasCommonSyms = false; }; // .o file. @@ -211,6 +232,7 @@ template class ObjFile : public ELFFileBase { } void parse(bool ignoreComdats = false); + void parseLazy(); StringRef getShtGroupSignature(ArrayRef sections, const Elf_Shdr &sec); @@ -294,36 +316,6 @@ template class ObjFile : public ELFFileBase { llvm::once_flag initDwarf; }; -// LazyObjFile is analogous to ArchiveFile in the sense that -// the file contains lazy symbols. The difference is that -// LazyObjFile wraps a single file instead of multiple files. -// -// This class is used for --start-lib and --end-lib options which -// instruct the linker to link object files between them with the -// archive file semantics. -class LazyObjFile : public InputFile { -public: - LazyObjFile(MemoryBufferRef m, StringRef archiveName, - uint64_t offsetInArchive) - : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { - this->archiveName = archiveName; - } - - static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } - - template void parse(); - void extract(); - - // Check if a non-common symbol should be extracted to override a common - // definition. - bool shouldExtractForCommon(const StringRef &name); - - bool extracted = false; - -private: - uint64_t offsetInArchive; -}; - // An ArchiveFile object represents a .a file. class ArchiveFile : public InputFile { public: @@ -354,9 +346,10 @@ class ArchiveFile : public InputFile { class BitcodeFile : public InputFile { public: BitcodeFile(MemoryBufferRef m, StringRef archiveName, - uint64_t offsetInArchive); + uint64_t offsetInArchive, bool lazy); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } template void parse(); + void parseLazy(); std::unique_ptr obj; }; @@ -406,6 +399,8 @@ class BinaryFile : public InputFile { InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", uint64_t offsetInArchive = 0); +InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive); inline bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; @@ -413,12 +408,12 @@ inline bool isBitcode(MemoryBufferRef mb) { std::string replaceThinLTOSuffix(StringRef path); -extern std::vector archiveFiles; -extern std::vector binaryFiles; -extern std::vector bitcodeFiles; -extern std::vector lazyObjFiles; -extern std::vector objectFiles; -extern std::vector sharedFiles; +extern SmallVector archiveFiles; +extern SmallVector binaryFiles; +extern SmallVector bitcodeFiles; +extern SmallVector lazyBitcodeFiles; +extern SmallVector objectFiles; +extern SmallVector sharedFiles; } // namespace elf } // namespace lld diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 61654b95704f..e1ee3def89f3 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -40,7 +40,7 @@ using namespace llvm::sys; using namespace lld; using namespace lld::elf; -std::vector elf::inputSections; +SmallVector elf::inputSections; DenseSet> elf::ppc64noTocRelax; // Returns a string to construct an error message. @@ -163,16 +163,16 @@ template RelsOrRelas InputSectionBase::relsOrRelas() const { if (relSecIdx == 0) return {}; RelsOrRelas ret; - const ELFFile obj = cast(file)->getObj(); - typename ELFT::Shdr shdr = cantFail(obj.sections())[relSecIdx]; + typename ELFT::Shdr shdr = + cast(file)->getELFShdrs()[relSecIdx]; if (shdr.sh_type == SHT_REL) { ret.rels = makeArrayRef(reinterpret_cast( - obj.base() + shdr.sh_offset), + file->mb.getBufferStart() + shdr.sh_offset), shdr.sh_size / sizeof(typename ELFT::Rel)); } else { assert(shdr.sh_type == SHT_RELA); ret.relas = makeArrayRef(reinterpret_cast( - obj.base() + shdr.sh_offset), + file->mb.getBufferStart() + shdr.sh_offset), shdr.sh_size / sizeof(typename ELFT::Rela)); } return ret; @@ -395,6 +395,7 @@ InputSectionBase *InputSection::getRelocatedSection() const { // for each relocation. So we copy relocations one by one. template void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { + const TargetInfo &target = *elf::target; InputSectionBase *sec = getRelocatedSection(); for (const RelTy &rel : rels) { @@ -433,8 +434,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { sec->name != ".gcc_except_table" && sec->name != ".got2" && sec->name != ".toc") { uint32_t secIdx = cast(sym).discardedSecIdx; - Elf_Shdr_Impl sec = - CHECK(file->getObj().sections(), file)[secIdx]; + Elf_Shdr_Impl sec = file->template getELFShdrs()[secIdx]; warn("relocation refers to a discarded section: " + CHECK(file->getObj().getSectionName(sec), file) + "\n>>> referenced by " + getObjMsg(p->r_offset)); @@ -442,7 +442,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { p->setSymbolAndType(0, 0, false); continue; } - SectionBase *section = d->section->repl; + SectionBase *section = d->section; if (!section->isLive()) { p->setSymbolAndType(0, 0, false); continue; @@ -451,10 +451,10 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { int64_t addend = getAddend(rel); const uint8_t *bufLoc = sec->data().begin() + rel.r_offset; if (!RelTy::IsRela) - addend = target->getImplicitAddend(bufLoc, type); + addend = target.getImplicitAddend(bufLoc, type); if (config->emachine == EM_MIPS && - target->getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { + target.getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But // relocatable files produced by a compiler or a linker @@ -471,16 +471,16 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { if (RelTy::IsRela) p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; - else if (config->relocatable && type != target->noneRel) + else if (config->relocatable && type != target.noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && - p->r_addend >= 0x8000) { + p->r_addend >= 0x8000 && sec->file->ppc32Got2) { // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 // indicates that r30 is relative to the input section .got2 // (r_addend>=0x8000), after linking, r30 should be relative to the output // section .got2 . To compensate for the shift, adjust r_addend by - // ppc32Got2OutSecOff. - p->r_addend += sec->file->ppc32Got2OutSecOff; + // ppc32Got->outSecOff. + p->r_addend += sec->file->ppc32Got2->outSecOff; } } } @@ -865,6 +865,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, template void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; + const TargetInfo &target = *elf::target; const bool isDebug = isDebugSection(*this); const bool isDebugLocOrRanges = isDebug && (name == ".debug_loc" || name == ".debug_ranges"); @@ -890,16 +891,16 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { uint8_t *bufLoc = buf + offset; int64_t addend = getAddend(rel); if (!RelTy::IsRela) - addend += target->getImplicitAddend(bufLoc, type); + addend += target.getImplicitAddend(bufLoc, type); Symbol &sym = getFile()->getRelocTargetSym(rel); - RelExpr expr = target->getRelExpr(type, sym, bufLoc); + RelExpr expr = target.getRelExpr(type, sym, bufLoc); if (expr == R_NONE) continue; if (expr == R_SIZE) { - target->relocateNoSym(bufLoc, type, - SignExtend64(sym.getSize() + addend)); + target.relocateNoSym(bufLoc, type, + SignExtend64(sym.getSize() + addend)); continue; } @@ -923,14 +924,14 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // address 0. For bug-compatibilty, we accept them with warnings. We // know Steel Bank Common Lisp as of 2018 have this bug. warn(msg); - target->relocateNoSym( + target.relocateNoSym( bufLoc, type, SignExtend64(sym.getVA(addend - offset - outSecOff))); continue; } if (tombstone || - (isDebug && (type == target->symbolicRel || expr == R_DTPREL))) { + (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) { // Resolve relocations in .debug_* referencing (discarded symbols or ICF // folded section symbols) to a tombstone value. Resolving to addend is // unsatisfactory because the result address range may collide with a @@ -948,10 +949,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // // If the referenced symbol is discarded (made Undefined), or the // section defining the referenced symbol is garbage collected, - // sym.getOutputSection() is nullptr. `ds->section->repl != ds->section` - // catches the ICF folded case. However, resolving a relocation in - // .debug_line to -1 would stop debugger users from setting breakpoints on - // the folded-in function, so exclude .debug_line. + // sym.getOutputSection() is nullptr. `ds->folded` catches the ICF folded + // case. However, resolving a relocation in .debug_line to -1 would stop + // debugger users from setting breakpoints on the folded-in function, so + // exclude .debug_line. // // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value // (base address selection entry), use 1 (which is used by GNU ld for @@ -960,16 +961,15 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // TODO To reduce disruption, we use 0 instead of -1 as the tombstone // value. Enable -1 in a future release. auto *ds = dyn_cast(&sym); - if (!sym.getOutputSection() || - (ds && ds->section->repl != ds->section && !isDebugLine)) { + if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. const uint64_t value = tombstone ? SignExtend64(*tombstone) : (isDebugLocOrRanges ? 1 : 0); - target->relocateNoSym(bufLoc, type, value); + target.relocateNoSym(bufLoc, type, value); continue; } } - target->relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); + target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); } } @@ -1015,6 +1015,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + const TargetInfo &target = *elf::target; uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); for (const Relocation &rel : relocations) { @@ -1033,7 +1034,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { switch (rel.expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - target->relaxGot(bufLoc, rel, targetVA); + target.relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after @@ -1046,7 +1047,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { lastPPCRelaxedRelocOff = offset; if (rel.type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) break; - target->relaxGot(bufLoc, rel, targetVA); + target.relaxGot(bufLoc, rel, targetVA); break; } case R_PPC64_RELAX_TOC: @@ -1057,25 +1058,25 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { // opportunities but is safe. if (ppc64noTocRelax.count({rel.sym, rel.addend}) || !tryRelaxPPC64TocIndirection(rel, bufLoc)) - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: - target->relaxTlsIeToLe(bufLoc, rel, targetVA); + target.relaxTlsIeToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_LD_TO_LE_ABS: - target->relaxTlsLdToLe(bufLoc, rel, targetVA); + target.relaxTlsLdToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: - target->relaxTlsGdToLe(bufLoc, rel, targetVA); + target.relaxTlsGdToLe(bufLoc, rel, targetVA); break; case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOTPLT: - target->relaxTlsGdToIe(bufLoc, rel, targetVA); + target.relaxTlsGdToIe(bufLoc, rel, targetVA); break; case R_PPC64_CALL: // If this is a call to __tls_get_addr, it may be part of a TLS @@ -1100,10 +1101,10 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { } write32(bufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; default: - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; } } @@ -1116,7 +1117,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { for (const JumpInstrMod &jumpMod : jumpInstrMods) { uint64_t offset = jumpMod.offset; uint8_t *bufLoc = buf + offset; - target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); + target.applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); } } } diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index a5967b500f90..5319830b5d80 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -52,13 +52,6 @@ class SectionBase { StringRef name; - // This pointer points to the "real" instance of this instance. - // Usually Repl == this. However, if ICF merges two sections, - // Repl pointer of one section points to another section. So, - // if you need to get a pointer to this instance, do not use - // this but instead this->Repl. - SectionBase *repl; - uint8_t sectionKind : 3; // The next two bit fields are only used by InputSectionBase, but we @@ -102,9 +95,9 @@ class SectionBase { constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, uint32_t entsize, uint32_t alignment, uint32_t type, uint32_t info, uint32_t link) - : name(name), repl(this), sectionKind(sectionKind), bss(false), - keepUnique(false), partition(0), alignment(alignment), flags(flags), - entsize(entsize), type(type), link(link), info(info) {} + : name(name), sectionKind(sectionKind), bss(false), keepUnique(false), + partition(0), alignment(alignment), flags(flags), entsize(entsize), + type(type), link(link), info(info) {} }; // This corresponds to a section of an input file. @@ -367,6 +360,10 @@ class InputSection : public InputSectionBase { template void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef rels); + // Points to the canonical section. If ICF folds two sections, repl pointer of + // one section points to the other. + InputSection *repl = this; + // Used by ICF. uint32_t eqClass[2] = {0, 0}; @@ -394,7 +391,7 @@ inline bool isDebugSection(const InputSectionBase &sec) { } // The list of all input sections. -extern std::vector inputSections; +extern SmallVector inputSections; // The set of TOC entries (.toc + addend) for which we should not apply // toc-indirect to toc-relative relaxation. const Symbol * refers to the diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 46dc77a6789c..65b943c4a54c 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -204,6 +204,8 @@ BitcodeCompiler::BitcodeCompiler() { config->ltoPartitions); // Initialize usedStartStop. + if (bitcodeFiles.empty()) + return; for (Symbol *sym : symtab->symbols()) { StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) @@ -278,8 +280,8 @@ void BitcodeCompiler::add(BitcodeFile &f) { // This is needed because this is what GNU gold plugin does and we have a // distributed build system that depends on that behavior. static void thinLTOCreateEmptyIndexFiles() { - for (LazyObjFile *f : lazyObjFiles) { - if (f->extracted || !isBitcode(f->mb)) + for (BitcodeFile *f : lazyBitcodeFiles) { + if (!f->lazy) continue; std::string path = replaceThinLTOSuffix(getThinLTOOutputFile(f->getName())); std::unique_ptr os = openFile(path + ".thinlto.bc"); diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 782a658fdaed..e8f2ce4fdf1f 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -47,7 +47,7 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -LinkerScript *elf::script; +std::unique_ptr elf::script; static bool isSectionPrefix(StringRef prefix, StringRef name) { return name.consume_front(prefix) && (name.empty() || name[0] == '.'); @@ -560,22 +560,22 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, return ret; } -void LinkerScript::discard(InputSectionBase *s) { - if (s == in.shStrTab || s == mainPart->relrDyn) - error("discarding " + s->name + " section is not allowed"); +void LinkerScript::discard(InputSectionBase &s) { + if (&s == in.shStrTab || &s == mainPart->relrDyn) + error("discarding " + s.name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since // they are synthesized sections, we need to handle them differently than // other regular sections. - if (s == mainPart->gnuHashTab) + if (&s == mainPart->gnuHashTab) mainPart->gnuHashTab = nullptr; - if (s == mainPart->hashTab) + if (&s == mainPart->hashTab) mainPart->hashTab = nullptr; - s->markDead(); - s->parent = nullptr; - for (InputSection *ds : s->dependentSections) - discard(ds); + s.markDead(); + s.parent = nullptr; + for (InputSection *sec : s.dependentSections) + discard(*sec); } void LinkerScript::discardSynthetic(OutputSection &outCmd) { @@ -589,7 +589,7 @@ void LinkerScript::discardSynthetic(OutputSection &outCmd) { std::vector matches = computeInputSections(isd, secs); for (InputSectionBase *s : matches) - discard(s); + discard(*s); } } } @@ -618,7 +618,7 @@ void LinkerScript::processSectionCommands() { // Any input section assigned to it is discarded. if (osec->name == "/DISCARD/") { for (InputSectionBase *s : v) - discard(s); + discard(*s); discardSynthetic(*osec); osec->commands.clear(); return false; @@ -1338,7 +1338,7 @@ std::vector LinkerScript::createPhdrs() { // Process PHDRS and FILEHDR keywords because they are not // real output sections and cannot be added in the following loop. for (const PhdrsCommand &cmd : phdrsCommands) { - PhdrEntry *phdr = make(cmd.type, cmd.flags ? *cmd.flags : PF_R); + PhdrEntry *phdr = make(cmd.type, cmd.flags.getValueOr(PF_R)); if (cmd.hasFilehdr) phdr->add(Out::elfHeader); diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index badc4d126be8..f385c8320978 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -312,7 +312,7 @@ class LinkerScript final { bool hasPhdrsCommands() { return !phdrsCommands.empty(); } uint64_t getDot() { return dot; } - void discard(InputSectionBase *s); + void discard(InputSectionBase &s); ExprValue getSymbolValue(StringRef name, const Twine &loc); @@ -366,7 +366,7 @@ class LinkerScript final { std::vector orphanSections; }; -extern LinkerScript *script; +extern std::unique_ptr script; } // end namespace elf } // end namespace lld diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 1a17f3bbfdc5..b63f2beb9dcb 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -244,8 +244,6 @@ template void MarkLive::run() { for (StringRef s : script->referencedSymbols) markSymbol(symtab->find(s)); - // Preserve special sections and those which are specified in linker - // script KEEP command. for (InputSectionBase *sec : inputSections) { // Mark .eh_frame sections as live because there are usually no relocations // that point to .eh_frames. Otherwise, the garbage collector would drop @@ -259,6 +257,7 @@ template void MarkLive::run() { scanEhFrameSection(*eh, rels.rels); else if (rels.relas.size()) scanEhFrameSection(*eh, rels.relas); + continue; } if (sec->flags & SHF_GNU_RETAIN) { @@ -268,6 +267,39 @@ template void MarkLive::run() { if (sec->flags & SHF_LINK_ORDER) continue; + // Usually, non-SHF_ALLOC sections are not removed even if they are + // unreachable through relocations because reachability is not a good signal + // whether they are garbage or not (e.g. there is usually no section + // referring to a .comment section, but we want to keep it.) When a + // non-SHF_ALLOC section is retained, we also retain sections dependent on + // it. + // + // Note on SHF_LINK_ORDER: Such sections contain metadata and they + // have a reverse dependency on the InputSection they are linked with. + // We are able to garbage collect them. + // + // Note on SHF_REL{,A}: Such sections reach here only when -r + // or --emit-reloc were given. And they are subject of garbage + // collection because, if we remove a text section, we also + // remove its relocation section. + // + // Note on nextInSectionGroup: The ELF spec says that group sections are + // included or omitted as a unit. We take the interpretation that: + // + // - Group members (nextInSectionGroup != nullptr) are subject to garbage + // collection. + // - Groups members are retained or discarded as a unit. + if (!(sec->flags & SHF_ALLOC)) { + bool isRel = sec->type == SHT_REL || sec->type == SHT_RELA; + if (!isRel && !sec->nextInSectionGroup) { + sec->markLive(); + for (InputSection *isec : sec->dependentSections) + isec->markLive(); + } + } + + // Preserve special sections and those which are specified in linker + // script KEEP command. if (isReserved(sec) || script->shouldKeep(sec)) { enqueue(sec, 0); } else if ((!config->zStartStopGC || sec->name.startswith("__libc_")) && @@ -349,46 +381,6 @@ template void elf::markLive() { return; } - // Otherwise, do mark-sweep GC. - // - // The --gc-sections option works only for SHF_ALLOC sections (sections that - // are memory-mapped at runtime). So we can unconditionally make non-SHF_ALLOC - // sections alive except SHF_LINK_ORDER, SHT_REL/SHT_RELA sections, and - // sections in a group. - // - // Usually, non-SHF_ALLOC sections are not removed even if they are - // unreachable through relocations because reachability is not a good signal - // whether they are garbage or not (e.g. there is usually no section referring - // to a .comment section, but we want to keep it.) When a non-SHF_ALLOC - // section is retained, we also retain sections dependent on it. - // - // Note on SHF_LINK_ORDER: Such sections contain metadata and they - // have a reverse dependency on the InputSection they are linked with. - // We are able to garbage collect them. - // - // Note on SHF_REL{,A}: Such sections reach here only when -r - // or --emit-reloc were given. And they are subject of garbage - // collection because, if we remove a text section, we also - // remove its relocation section. - // - // Note on nextInSectionGroup: The ELF spec says that group sections are - // included or omitted as a unit. We take the interpretation that: - // - // - Group members (nextInSectionGroup != nullptr) are subject to garbage - // collection. - // - Groups members are retained or discarded as a unit. - for (InputSectionBase *sec : inputSections) { - bool isAlloc = (sec->flags & SHF_ALLOC); - bool isLinkOrder = (sec->flags & SHF_LINK_ORDER); - bool isRel = (sec->type == SHT_REL || sec->type == SHT_RELA); - - if (!isAlloc && !isLinkOrder && !isRel && !sec->nextInSectionGroup) { - sec->markLive(); - for (InputSection *isec : sec->dependentSections) - isec->markLive(); - } - } - // Follow the graph to mark all live sections. for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart) MarkLive(curPart).run(); diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 71ef547a5f38..4a03ac387814 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -40,7 +40,7 @@ OutputSection *Out::preinitArray; OutputSection *Out::initArray; OutputSection *Out::finiArray; -std::vector elf::outputSections; +SmallVector elf::outputSections; uint32_t OutputSection::getPhdrFlags() const { uint32_t ret = 0; diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index a5b05cf28aa8..fb3eb0059909 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -138,7 +138,7 @@ struct Out { uint64_t getHeaderSize(); -extern std::vector outputSections; +extern llvm::SmallVector outputSections; } // namespace elf } // namespace lld diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 79ba3194d73b..719386e2e71f 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -295,12 +295,12 @@ static SmallSet getSymbolsAt(SharedSymbol &ss) { // in .bss and in the case of a canonical plt entry it is in .plt. This function // replaces the existing symbol with a Defined pointing to the appropriate // location. -static void replaceWithDefined(Symbol &sym, SectionBase *sec, uint64_t value, +static void replaceWithDefined(Symbol &sym, SectionBase &sec, uint64_t value, uint64_t size) { Symbol old = sym; sym.replace(Defined{sym.file, sym.getName(), sym.binding, sym.stOther, - sym.type, value, size, sec}); + sym.type, value, size, &sec}); sym.pltIndex = old.pltIndex; sym.gotIndex = old.gotIndex; @@ -379,9 +379,9 @@ template static void addCopyRelSymbolImpl(SharedSymbol &ss) { // dynamic symbol for each one. This causes the copy relocation to correctly // interpose any aliases. for (SharedSymbol *sym : getSymbolsAt(ss)) - replaceWithDefined(*sym, sec, 0, sym->size); + replaceWithDefined(*sym, *sec, 0, sym->size); - mainPart->relaDyn->addSymbolReloc(target->copyRel, sec, 0, ss); + mainPart->relaDyn->addSymbolReloc(target->copyRel, *sec, 0, ss); } static void addCopyRelSymbol(SharedSymbol &ss) { @@ -472,8 +472,8 @@ static std::string maybeReportDiscarded(Undefined &sym) { if (!file || !sym.discardedSecIdx || file->getSections()[sym.discardedSecIdx] != &InputSection::discarded) return ""; - ArrayRef> objSections = - CHECK(file->getObj().sections(), file); + ArrayRef objSections = + file->template getELFShdrs(); std::string msg; if (sym.type == ELF::STT_SECTION) { @@ -833,10 +833,10 @@ class OffsetGetter { }; } // namespace -static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, +static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType type) { - Partition &part = isec->getPartition(); + Partition &part = isec.getPartition(); // Add a relative relocation. If relrDyn section is enabled, and the // relocation offset is guaranteed to be even, add the relocation to @@ -844,9 +844,9 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, // relrDyn sections don't support odd offsets. Also, relrDyn sections // don't store the addend values, so we must write it to the relocated // address. - if (part.relrDyn && isec->alignment >= 2 && offsetInSec % 2 == 0) { - isec->relocations.push_back({expr, type, offsetInSec, addend, &sym}); - part.relrDyn->relocs.push_back({isec, offsetInSec}); + if (part.relrDyn && isec.alignment >= 2 && offsetInSec % 2 == 0) { + isec.relocations.push_back({expr, type, offsetInSec, addend, &sym}); + part.relrDyn->relocs.push_back({&isec, offsetInSec}); return; } part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym, @@ -854,14 +854,14 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, } template -static void addPltEntry(PltSection *plt, GotPltSection *gotPlt, - RelocationBaseSection *rel, RelType type, Symbol &sym) { - plt->addEntry(sym); - gotPlt->addEntry(sym); - rel->addReloc({type, gotPlt, sym.getGotPltOffset(), - sym.isPreemptible ? DynamicReloc::AgainstSymbol - : DynamicReloc::AddendOnlyWithTargetVA, - sym, 0, R_ABS}); +static void addPltEntry(PltSection &plt, GotPltSection &gotPlt, + RelocationBaseSection &rel, RelType type, Symbol &sym) { + plt.addEntry(sym); + gotPlt.addEntry(sym); + rel.addReloc({type, &gotPlt, sym.getGotPltOffset(), + sym.isPreemptible ? DynamicReloc::AgainstSymbol + : DynamicReloc::AddendOnlyWithTargetVA, + sym, 0, R_ABS}); } static void addGotEntry(Symbol &sym) { @@ -880,7 +880,7 @@ static void addGotEntry(Symbol &sym) { if (!config->isPic || isAbsolute(sym)) in.got->relocations.push_back({R_ABS, target->symbolicRel, off, 0, &sym}); else - addRelativeReloc(in.got, off, sym, 0, R_ABS, target->symbolicRel); + addRelativeReloc(*in.got, off, sym, 0, R_ABS, target->symbolicRel); } static void addTpOffsetGotEntry(Symbol &sym) { @@ -891,7 +891,7 @@ static void addTpOffsetGotEntry(Symbol &sym) { return; } mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( - target->tlsGotRel, in.got, off, sym, target->symbolicRel); + target->tlsGotRel, *in.got, off, sym, target->symbolicRel); } // Return true if we can define a symbol in the executable that @@ -1019,12 +1019,12 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, if (canWrite) { RelType rel = target->getDynRel(type); if (expr == R_GOT || (rel == target->symbolicRel && !sym.isPreemptible)) { - addRelativeReloc(&sec, offset, sym, addend, expr, type); + addRelativeReloc(sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { if (config->emachine == EM_MIPS && rel == target->symbolicRel) rel = target->relativeRel; - sec.getPartition().relaDyn->addSymbolReloc(rel, &sec, offset, sym, addend, + sec.getPartition().relaDyn->addSymbolReloc(rel, sec, offset, sym, addend, type); // MIPS ABI turns using of GOT and dynamic relocations inside out. @@ -1145,10 +1145,9 @@ static unsigned handleMipsTlsRelocation(RelType type, Symbol &sym, // symbol in TLS block. // // Returns the number of relocations processed. -template -static unsigned -handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, - typename ELFT::uint offset, int64_t addend, RelExpr expr) { +static unsigned handleTlsRelocation(RelType type, Symbol &sym, + InputSectionBase &c, uint64_t offset, + int64_t addend, RelExpr expr) { if (!sym.isTls()) return 0; @@ -1158,13 +1157,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof(expr) && config->shared) { - if (in.got->addDynTlsEntry(sym)) { - uint64_t off = in.got->getGlobalDynOffset(sym); - mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( - target->tlsDescRel, in.got, off, sym, target->tlsDescRel); - } - if (expr != R_TLSDESC_CALL) + if (expr != R_TLSDESC_CALL) { + sym.needsTlsDesc = true; c.relocations.push_back({expr, type, offset, addend, &sym}); + } return 1; } @@ -1200,14 +1196,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, } if (expr == R_TLSLD_HINT) return 1; - if (in.got->addTlsIndex()) { - if (isLocalInExecutable) - in.got->relocations.push_back( - {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); - else - mainPart->relaDyn->addReloc( - {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); - } + sym.needsTlsLd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1223,12 +1212,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // Local-Dynamic sequence where offset of tls variable relative to dynamic // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. if (expr == R_TLSLD_GOT_OFF) { - if (!sym.isInGot()) { - in.got->addEntry(sym); - uint64_t off = sym.getGotOffset(); - in.got->relocations.push_back( - {R_ABS, target->tlsOffsetRel, off, 0, &sym}); - } + sym.needsGotDtprel = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1236,27 +1220,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof(expr)) { if (!toExecRelax) { - if (in.got->addDynTlsEntry(sym)) { - uint64_t off = in.got->getGlobalDynOffset(sym); - - if (isLocalInExecutable) - // Write one to the GOT slot. - in.got->relocations.push_back( - {R_ADDEND, target->symbolicRel, off, 1, &sym}); - else - mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, in.got, - off, sym); - - // If the symbol is preemptible we need the dynamic linker to write - // the offset too. - uint64_t offsetOff = off + config->wordsize; - if (sym.isPreemptible) - mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, in.got, - offsetOff, sym); - else - in.got->relocations.push_back( - {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); - } + sym.needsTlsGd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1264,14 +1228,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. if (sym.isPreemptible) { + sym.needsTlsGdToIe = true; c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset, addend, &sym}); - if (!sym.isInGot()) { - in.got->addEntry(sym); - mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, in.got, - sym.getGotOffset(), sym); - } } else { c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_LE), type, offset, @@ -1288,11 +1248,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, c.relocations.push_back( {R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { - if (!sym.isInGot()) - addTpOffsetGotEntry(sym); + sym.needsTlsIe = true; // R_GOT needs a relative relocation for PIC on i386 and Hexagon. if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type)) - addRelativeReloc(&c, offset, sym, addend, expr, type); + addRelativeReloc(c, offset, sym, addend, expr, type); else c.relocations.push_back({expr, type, offset, addend, &sym}); } @@ -1304,7 +1263,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, template static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, - RelTy *start, RelTy *end) { + RelTy *end) { const RelTy &rel = *i; uint32_t symIndex = rel.getSymbol(config->isMips64EL); Symbol &sym = sec.getFile()->getSymbol(symIndex); @@ -1394,8 +1353,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, getLocation(sec, sym, offset)); return; } - } else if (unsigned processed = handleTlsRelocation( - type, sym, sec, offset, addend, expr)) { + } else if (unsigned processed = + handleTlsRelocation(type, sym, sec, offset, addend, expr)) { i += (processed - 1); return; } @@ -1430,7 +1389,7 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // direct relocation on through. if (sym.isGnuIFunc() && config->zIfuncNoplt) { sym.exportDynamic = true; - mainPart->relaDyn->addSymbolReloc(type, &sec, offset, sym, addend, type); + mainPart->relaDyn->addSymbolReloc(type, sec, offset, sym, addend, type); return; } @@ -1511,7 +1470,7 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { rels = sortRels(rels, storage); for (auto i = rels.begin(), end = rels.end(); i != end;) - scanReloc(sec, getOffset, i, rels.begin(), end); + scanReloc(sec, getOffset, i, end); // Sort relocations by offset for more efficient searching for // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. @@ -1585,7 +1544,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym) { // may alter section/value, so create a copy of the symbol to make // section/value fixed. auto *directSym = makeDefined(cast(sym)); - addPltEntry(in.iplt, in.igotPlt, in.relaIplt, target->iRelativeRel, + addPltEntry(*in.iplt, *in.igotPlt, *in.relaIplt, target->iRelativeRel, *directSym); sym.pltIndex = directSym->pltIndex; @@ -1615,7 +1574,7 @@ void elf::postScanRelocations() { if (sym.needsGot) addGotEntry(sym); if (sym.needsPlt) - addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); + addPltEntry(*in.plt, *in.gotPlt, *in.relaPlt, target->pltRel, sym); if (sym.needsCopy) { if (sym.isObject()) { addCopyRelSymbol(cast(sym)); @@ -1626,18 +1585,73 @@ void elf::postScanRelocations() { assert(sym.isFunc() && sym.needsPlt); if (!sym.isDefined()) { replaceWithDefined( - sym, in.plt, + sym, *in.plt, target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0); sym.needsCopy = true; if (config->emachine == EM_PPC) { // PPC32 canonical PLT entries are at the beginning of .glink cast(sym).value = in.plt->headerSize; in.plt->headerSize += 16; - cast(in.plt)->canonical_plts.push_back(&sym); + cast(*in.plt).canonical_plts.push_back(&sym); } } } } + + if (!sym.isTls()) + return; + bool isLocalInExecutable = !sym.isPreemptible && !config->shared; + + if (sym.needsTlsDesc) { + in.got->addDynTlsEntry(sym); + mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( + target->tlsDescRel, *in.got, in.got->getGlobalDynOffset(sym), sym, + target->tlsDescRel); + } + if (sym.needsTlsGd && !sym.needsTlsDesc) { + // TODO Support mixed TLSDESC and TLS GD. + in.got->addDynTlsEntry(sym); + uint64_t off = in.got->getGlobalDynOffset(sym); + if (isLocalInExecutable) + // Write one to the GOT slot. + in.got->relocations.push_back( + {R_ADDEND, target->symbolicRel, off, 1, &sym}); + else + mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *in.got, + off, sym); + + // If the symbol is preemptible we need the dynamic linker to write + // the offset too. + uint64_t offsetOff = off + config->wordsize; + if (sym.isPreemptible) + mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *in.got, + offsetOff, sym); + else + in.got->relocations.push_back( + {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); + } + if (sym.needsTlsGdToIe) { + in.got->addEntry(sym); + mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, + sym.getGotOffset(), sym); + } + + if (sym.needsTlsLd && in.got->addTlsIndex()) { + if (isLocalInExecutable) + in.got->relocations.push_back( + {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); + else + mainPart->relaDyn->addReloc( + {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); + } + if (sym.needsGotDtprel) { + in.got->addEntry(sym); + in.got->relocations.push_back( + {R_ABS, target->tlsOffsetRel, sym.getGotOffset(), 0, &sym}); + } + + if (sym.needsTlsIe && !sym.needsTlsGdToIe) + addTpOffsetGotEntry(sym); }; for (Symbol *sym : symtab->symbols()) fn(*sym); @@ -2004,8 +2018,8 @@ std::pair ThunkCreator::getThunk(InputSection *isec, // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) - thunkVec = &thunkedSymbolsBySectionAndAddend[{ - {d->section->repl, d->value}, keyAddend}]; + thunkVec = &thunkedSymbolsBySectionAndAddend[{{d->section, d->value}, + keyAddend}]; if (!thunkVec) thunkVec = &thunkedSymbols[{rel.sym, keyAddend}]; @@ -2159,7 +2173,7 @@ void elf::hexagonTLSSymbolUpdate(ArrayRef outputSections) { for (Relocation &rel : isec->relocations) if (rel.sym->type == llvm::ELF::STT_TLS && rel.expr == R_PLT_PC) { if (needEntry) { - addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, + addPltEntry(*in.plt, *in.gotPlt, *in.relaPlt, target->pltRel, *sym); needEntry = false; } diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index ce77f9233fea..a12c5f22c4fe 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -29,7 +29,7 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -SymbolTable *elf::symtab; +std::unique_ptr elf::symtab; void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { // Redirect __real_foo to the original foo and foo to the original __wrap_foo. @@ -70,11 +70,8 @@ Symbol *SymbolTable::insert(StringRef name) { stem = name.take_front(pos); auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()}); - int &symIndex = p.first->second; - bool isNew = p.second; - - if (!isNew) { - Symbol *sym = symVector[symIndex]; + if (!p.second) { + Symbol *sym = symVector[p.first->second]; if (stem.size() != name.size()) sym->setName(name); return sym; @@ -133,7 +130,7 @@ static bool canBeVersioned(const Symbol &sym) { // other than trying to match a pattern against all demangled symbols. // So, if "extern C++" feature is used, we need to demangle all known // symbols. -StringMap> &SymbolTable::getDemangledSyms() { +StringMap> &SymbolTable::getDemangledSyms() { if (!demangledSyms) { demangledSyms.emplace(); std::string demangled; @@ -154,7 +151,7 @@ StringMap> &SymbolTable::getDemangledSyms() { return *demangledSyms; } -std::vector SymbolTable::findByVersion(SymbolVersion ver) { +SmallVector SymbolTable::findByVersion(SymbolVersion ver) { if (ver.isExternCpp) return getDemangledSyms().lookup(ver.name); if (Symbol *sym = find(ver.name)) @@ -163,9 +160,9 @@ std::vector SymbolTable::findByVersion(SymbolVersion ver) { return {}; } -std::vector SymbolTable::findAllByVersion(SymbolVersion ver, - bool includeNonDefault) { - std::vector res; +SmallVector SymbolTable::findAllByVersion(SymbolVersion ver, + bool includeNonDefault) { + SmallVector res; SingleStringMatcher m(ver.name); auto check = [&](StringRef name) { size_t pos = name.find('@'); @@ -191,8 +188,8 @@ std::vector SymbolTable::findAllByVersion(SymbolVersion ver, } void SymbolTable::handleDynamicList() { + SmallVector syms; for (SymbolVersion &ver : config->dynamicList) { - std::vector syms; if (ver.hasWildcard) syms = findAllByVersion(ver, /*includeNonDefault=*/true); else @@ -209,7 +206,7 @@ bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, StringRef versionName, bool includeNonDefault) { // Get a list of symbols which we need to assign the version to. - std::vector syms = findByVersion(ver); + SmallVector syms = findByVersion(ver); auto getName = [](uint16_t ver) -> std::string { if (ver == VER_NDX_LOCAL) @@ -264,7 +261,6 @@ void SymbolTable::scanVersionScript() { SmallString<128> buf; // First, we assign versions to exact matching symbols, // i.e. version definitions not containing any glob meta-characters. - std::vector syms; for (VersionDefinition &v : config->versionDefinitions) { auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { bool found = diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 54c4b1169ed1..84d93a3dc786 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -35,8 +35,9 @@ class SymbolTable { struct FilterOutPlaceholder { bool operator()(Symbol *S) const { return !S->isPlaceholder(); } }; - using iterator = llvm::filter_iterator::const_iterator, - FilterOutPlaceholder>; + using iterator = + llvm::filter_iterator::const_iterator, + FilterOutPlaceholder>; public: llvm::iterator_range symbols() const { @@ -64,11 +65,11 @@ class SymbolTable { llvm::DenseMap comdatGroups; private: - std::vector findByVersion(SymbolVersion ver); - std::vector findAllByVersion(SymbolVersion ver, - bool includeNonDefault); + SmallVector findByVersion(SymbolVersion ver); + SmallVector findAllByVersion(SymbolVersion ver, + bool includeNonDefault); - llvm::StringMap> &getDemangledSyms(); + llvm::StringMap> &getDemangledSyms(); bool assignExactVersion(SymbolVersion ver, uint16_t versionId, StringRef versionName, bool includeNonDefault); void assignWildcardVersion(SymbolVersion ver, uint16_t versionId, @@ -82,16 +83,16 @@ class SymbolTable { // FIXME: Experiment with passing in a custom hashing or sorting the symbols // once symbol resolution is finished. llvm::DenseMap symMap; - std::vector symVector; + SmallVector symVector; // A map from demangled symbol names to their symbol objects. // This mapping is 1:N because two symbols with different versions // can have the same name. We use this map to handle "extern C++ {}" // directive in version scripts. - llvm::Optional>> demangledSyms; + llvm::Optional>> demangledSyms; }; -extern SymbolTable *symtab; +extern std::unique_ptr symtab; } // namespace elf } // namespace lld diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index f00d3217a6af..20301497a059 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -78,7 +78,6 @@ static uint64_t getSymVA(const Symbol &sym, int64_t addend) { return d.value; assert(isec != &InputSection::discarded); - isec = isec->repl; uint64_t offset = d.value; @@ -256,10 +255,12 @@ void Symbol::parseSymbolVersion() { } void Symbol::extract() const { - if (auto *sym = dyn_cast(this)) + if (auto *sym = dyn_cast(this)) { cast(sym->file)->extract(sym->sym); - else - cast(this->file)->extract(); + } else if (file->lazy) { + file->lazy = false; + parseFile(file); + } } MemoryBufferRef LazyArchive::getMemberBuffer() { @@ -346,7 +347,7 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { report(": unable to order absolute symbol: "); else if (d && isa(d->section)) report(": unable to order synthetic symbol: "); - else if (d && !d->section->repl->isLive()) + else if (d && !d->section->isLive()) report(": unable to order discarded symbol: "); } @@ -549,7 +550,7 @@ void Symbol::resolveUndefined(const Undefined &other) { } // Undefined symbols in a SharedFile do not change the binding. - if (dyn_cast_or_null(other.file)) + if (isa_and_nonnull(other.file)) return; if (isUndefined() || isShared()) { @@ -607,7 +608,7 @@ int Symbol::compare(const Symbol *other) const { auto *oldSym = cast(this); auto *newSym = cast(other); - if (dyn_cast_or_null(other->file)) + if (isa_and_nonnull(other->file)) return 0; if (!oldSym->section && !newSym->section && oldSym->value == newSym->value && @@ -711,8 +712,7 @@ template void Symbol::resolveLazy(const LazyT &other) { return; } } else if (auto *loSym = dyn_cast(&other)) { - LazyObjFile *obj = cast(loSym->file); - if (obj->shouldExtractForCommon(loSym->getName())) { + if (loSym->file->shouldExtractForCommon(loSym->getName())) { replaceCommon(*this, other); return; } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index b0b7832135a7..27c36eedce80 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -248,8 +248,10 @@ class Symbol { exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false), canInline(false), referenced(false), traced(false), isInIplt(false), gotInIgot(false), isPreemptible(false), used(!config->gcSections), - needsTocRestore(false), scriptDefined(false), needsCopy(false), - needsGot(false), needsPlt(false), hasDirectReloc(false) {} + folded(false), needsTocRestore(false), scriptDefined(false), + needsCopy(false), needsGot(false), needsPlt(false), needsTlsDesc(false), + needsTlsGd(false), needsTlsGdToIe(false), needsTlsLd(false), + needsGotDtprel(false), needsTlsIe(false), hasDirectReloc(false) {} public: // True if this symbol is in the Iplt sub-section of the Plt and the Igot @@ -269,6 +271,9 @@ class Symbol { // which are referenced by relocations when -r or --emit-relocs is given. uint8_t used : 1; + // True if defined relative to a section discarded by ICF. + uint8_t folded : 1; + // True if a call to this symbol needs to be followed by a restore of the // PPC64 toc pointer. uint8_t needsTocRestore : 1; @@ -284,6 +289,12 @@ class Symbol { // entries during postScanRelocations(); uint8_t needsGot : 1; uint8_t needsPlt : 1; + uint8_t needsTlsDesc : 1; + uint8_t needsTlsGd : 1; + uint8_t needsTlsGdToIe : 1; + uint8_t needsTlsLd : 1; + uint8_t needsGotDtprel : 1; + uint8_t needsTlsIe : 1; uint8_t hasDirectReloc : 1; // The partition whose dynamic symbol table contains this symbol's definition. @@ -430,7 +441,9 @@ class LazyObject : public Symbol { public: LazyObject(InputFile &file, StringRef name) : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL, - llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {} + llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) { + isUsedInRegularObj = false; + } static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } }; @@ -487,9 +500,9 @@ union SymbolUnion { }; // It is important to keep the size of SymbolUnion small for performance and -// memory usage reasons. 72 bytes is a soft limit based on the size of Defined +// memory usage reasons. 80 bytes is a soft limit based on the size of Defined // on a 64-bit system. -static_assert(sizeof(SymbolUnion) <= 72, "SymbolUnion too large"); +static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large"); template struct AssertSymbol { static_assert(std::is_trivially_destructible(), diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index a901c898b270..e480118f5ae9 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -391,7 +391,7 @@ Defined *EhFrameSection::isFdeLive(EhSectionPiece &fde, ArrayRef rels) { // FDEs for garbage-collected or merged-by-ICF sections, or sections in // another partition, are dead. if (auto *d = dyn_cast(&b)) - if (d->section && d->section->partition == partition) + if (!d->folded && d->section && d->section->partition == partition) return d; return nullptr; } @@ -763,18 +763,18 @@ size_t MipsGotSection::FileGot::getIndexedEntriesNum() const { } MipsGotSection::FileGot &MipsGotSection::getGot(InputFile &f) { - if (!f.mipsGotIndex.hasValue()) { + if (f.mipsGotIndex == uint32_t(-1)) { gots.emplace_back(); gots.back().file = &f; f.mipsGotIndex = gots.size() - 1; } - return gots[*f.mipsGotIndex]; + return gots[f.mipsGotIndex]; } uint64_t MipsGotSection::getPageEntryOffset(const InputFile *f, const Symbol &sym, int64_t addend) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; uint64_t index = 0; if (const OutputSection *outSec = sym.getOutputSection()) { uint64_t secAddr = getMipsPageAddr(outSec->addr); @@ -788,7 +788,7 @@ uint64_t MipsGotSection::getPageEntryOffset(const InputFile *f, uint64_t MipsGotSection::getSymEntryOffset(const InputFile *f, const Symbol &s, int64_t addend) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; Symbol *sym = const_cast(&s); if (sym->isTls()) return g.tls.lookup(sym) * config->wordsize; @@ -798,13 +798,13 @@ uint64_t MipsGotSection::getSymEntryOffset(const InputFile *f, const Symbol &s, } uint64_t MipsGotSection::getTlsIndexOffset(const InputFile *f) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; return g.dynTlsSymbols.lookup(nullptr) * config->wordsize; } uint64_t MipsGotSection::getGlobalDynOffset(const InputFile *f, const Symbol &s) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; Symbol *sym = const_cast(&s); return g.dynTlsSymbols.lookup(sym) * config->wordsize; } @@ -1007,14 +1007,14 @@ void MipsGotSection::build() { // thread-locals that have been marked as local through a linker script) if (!s->isPreemptible && !config->shared) continue; - mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, this, + mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *this, offset, *s); // However, we can skip writing the TLS offset reloc for non-preemptible // symbols since it is known even in shared libraries if (!s->isPreemptible) continue; offset += config->wordsize; - mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, this, offset, + mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *this, offset, *s); } } @@ -1027,7 +1027,7 @@ void MipsGotSection::build() { // Dynamic relocations for "global" entries. for (const std::pair &p : got.global) { uint64_t offset = p.second * config->wordsize; - mainPart->relaDyn->addSymbolReloc(target->relativeRel, this, offset, + mainPart->relaDyn->addSymbolReloc(target->relativeRel, *this, offset, *p.first); } if (!config->isPic) @@ -1061,10 +1061,9 @@ uint64_t MipsGotSection::getGp(const InputFile *f) const { // For files without related GOT or files refer a primary GOT // returns "common" _gp value. For secondary GOTs calculate // individual _gp values. - if (!f || !f->mipsGotIndex.hasValue() || *f->mipsGotIndex == 0) + if (!f || f->mipsGotIndex == uint32_t(-1) || f->mipsGotIndex == 0) return ElfSym::mipsGp->getVA(0); - return getVA() + gots[*f->mipsGotIndex].startIndex * config->wordsize + - 0x7ff0; + return getVA() + gots[f->mipsGotIndex].startIndex * config->wordsize + 0x7ff0; } void MipsGotSection::writeTo(uint8_t *buf) { @@ -1298,8 +1297,8 @@ DynamicSection::computeContents() { auto addInt = [&](int32_t tag, uint64_t val) { entries.emplace_back(tag, val); }; - auto addInSec = [&](int32_t tag, const InputSection *sec) { - entries.emplace_back(tag, sec->getVA()); + auto addInSec = [&](int32_t tag, const InputSection &sec) { + entries.emplace_back(tag, sec.getVA()); }; for (StringRef s : config->filterList) @@ -1375,7 +1374,7 @@ DynamicSection::computeContents() { if (part.relaDyn->isNeeded() || (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { - addInSec(part.relaDyn->dynamicTag, part.relaDyn); + addInSec(part.relaDyn->dynamicTag, *part.relaDyn); entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)); bool isRela = config->isRela; @@ -1393,7 +1392,7 @@ DynamicSection::computeContents() { } if (part.relrDyn && !part.relrDyn->relocs.empty()) { addInSec(config->useAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR, - part.relrDyn); + *part.relrDyn); addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ, part.relrDyn->getParent()->size); addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT, @@ -1406,14 +1405,14 @@ DynamicSection::computeContents() { // case, so here we always use relaPlt as marker for the beginning of // .rel[a].plt section. if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) { - addInSec(DT_JMPREL, in.relaPlt); + addInSec(DT_JMPREL, *in.relaPlt); entries.emplace_back(DT_PLTRELSZ, addPltRelSz()); switch (config->emachine) { case EM_MIPS: - addInSec(DT_MIPS_PLTGOT, in.gotPlt); + addInSec(DT_MIPS_PLTGOT, *in.gotPlt); break; case EM_SPARCV9: - addInSec(DT_PLTGOT, in.plt); + addInSec(DT_PLTGOT, *in.plt); break; case EM_AARCH64: if (llvm::find_if(in.relaPlt->relocs, [](const DynamicReloc &r) { @@ -1423,7 +1422,7 @@ DynamicSection::computeContents() { addInt(DT_AARCH64_VARIANT_PCS, 0); LLVM_FALLTHROUGH; default: - addInSec(DT_PLTGOT, in.gotPlt); + addInSec(DT_PLTGOT, *in.gotPlt); break; } addInt(DT_PLTREL, config->isRela ? DT_RELA : DT_REL); @@ -1436,16 +1435,16 @@ DynamicSection::computeContents() { addInt(DT_AARCH64_PAC_PLT, 0); } - addInSec(DT_SYMTAB, part.dynSymTab); + addInSec(DT_SYMTAB, *part.dynSymTab); addInt(DT_SYMENT, sizeof(Elf_Sym)); - addInSec(DT_STRTAB, part.dynStrTab); + addInSec(DT_STRTAB, *part.dynStrTab); addInt(DT_STRSZ, part.dynStrTab->getSize()); if (!config->zText) addInt(DT_TEXTREL, 0); if (part.gnuHashTab) - addInSec(DT_GNU_HASH, part.gnuHashTab); + addInSec(DT_GNU_HASH, *part.gnuHashTab); if (part.hashTab) - addInSec(DT_HASH, part.hashTab); + addInSec(DT_HASH, *part.hashTab); if (isMain) { if (Out::preinitArray) { @@ -1470,13 +1469,13 @@ DynamicSection::computeContents() { } if (part.verSym && part.verSym->isNeeded()) - addInSec(DT_VERSYM, part.verSym); + addInSec(DT_VERSYM, *part.verSym); if (part.verDef && part.verDef->isLive()) { - addInSec(DT_VERDEF, part.verDef); + addInSec(DT_VERDEF, *part.verDef); addInt(DT_VERDEFNUM, getVerDefNum()); } if (part.verNeed && part.verNeed->isNeeded()) { - addInSec(DT_VERNEED, part.verNeed); + addInSec(DT_VERNEED, *part.verNeed); unsigned needNum = 0; for (SharedFile *f : sharedFiles) if (!f->vernauxs.empty()) @@ -1495,10 +1494,10 @@ DynamicSection::computeContents() { addInt(DT_MIPS_GOTSYM, b->dynsymIndex); else addInt(DT_MIPS_GOTSYM, part.dynSymTab->getNumSymbols()); - addInSec(DT_PLTGOT, in.mipsGot); + addInSec(DT_PLTGOT, *in.mipsGot); if (in.mipsRldMap) { if (!config->pie) - addInSec(DT_MIPS_RLD_MAP, in.mipsRldMap); + addInSec(DT_MIPS_RLD_MAP, *in.mipsRldMap); // Store the offset to the .rld_map section // relative to the address of the tag. addInt(DT_MIPS_RLD_MAP_REL, @@ -1509,7 +1508,7 @@ DynamicSection::computeContents() { // DT_PPC_GOT indicates to glibc Secure PLT is used. If DT_PPC_GOT is absent, // glibc assumes the old-style BSS PLT layout which we don't support. if (config->emachine == EM_PPC) - addInSec(DT_PPC_GOT, in.got); + addInSec(DT_PPC_GOT, *in.got); // Glink dynamic tag is required by the V2 abi if the plt section isn't empty. if (config->emachine == EM_PPC64 && in.plt->isNeeded()) { @@ -1574,7 +1573,7 @@ RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type, dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag) {} void RelocationBaseSection::addSymbolReloc(RelType dynType, - InputSectionBase *isec, + InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, Optional addendRelType) { @@ -1583,7 +1582,7 @@ void RelocationBaseSection::addSymbolReloc(RelType dynType, } void RelocationBaseSection::addRelativeReloc( - RelType dynType, InputSectionBase *inputSec, uint64_t offsetInSec, + RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) { // This function should only be called for non-preemptible symbols or // RelExpr values that refer to an address inside the output file (e.g. the @@ -1596,11 +1595,11 @@ void RelocationBaseSection::addRelativeReloc( } void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( - RelType dynType, InputSectionBase *isec, uint64_t offsetInSec, Symbol &sym, + RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType) { // No need to write an addend to the section for preemptible symbols. if (sym.isPreemptible) - addReloc({dynType, isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, + addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); else addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec, @@ -1608,16 +1607,16 @@ void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( } void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase *inputSec, + InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType addendRelType) { // Write the addends to the relocated address if required. We skip // it if the written value would be zero. if (config->writeAddends && (expr != R_ADDEND || addend != 0)) - inputSec->relocations.push_back( + inputSec.relocations.push_back( {expr, addendRelType, offsetInSec, addend, &sym}); - addReloc({dynType, inputSec, offsetInSec, kind, sym, addend, expr}); + addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr}); } void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { @@ -3621,14 +3620,12 @@ void PPC32Got2Section::finalizeContents() { // .got2 . This function computes outSecOff of each .got2 to be used in // PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is // to collect input sections named ".got2". - uint32_t offset = 0; for (SectionCommand *cmd : getParent()->commands) if (auto *isd = dyn_cast(cmd)) { for (InputSection *isec : isd->sections) { - if (isec == this) - continue; - isec->file->ppc32Got2OutSecOff = offset; - offset += (uint32_t)isec->getSize(); + // isec->file may be nullptr for MergeSyntheticSection. + if (isec != this && isec->file) + isec->file->ppc32Got2 = isec; } } } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 04a3b9b394af..c35e19cf2fb4 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -519,22 +519,22 @@ class RelocationBaseSection : public SyntheticSection { /// using relocations on the input section (e.g. MipsGotSection::writeTo()). void addReloc(const DynamicReloc &reloc); /// Add a dynamic relocation against \p sym with an optional addend. - void addSymbolReloc(RelType dynType, InputSectionBase *isec, + void addSymbolReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend = 0, llvm::Optional addendRelType = llvm::None); /// Add a relative dynamic relocation that uses the target address of \p sym /// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend. - void addRelativeReloc(RelType dynType, InputSectionBase *isec, + void addRelativeReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr); /// Add a dynamic relocation using the target address of \p sym as the addend /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym. void addAddendOnlyRelocIfNonPreemptible(RelType dynType, - InputSectionBase *isec, + InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType); void addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase *inputSec, uint64_t offsetInSec, Symbol &sym, + InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType addendRelType); bool isNeeded() const override { return !relocs.empty(); } size_t getSize() const override { return relocs.size() * this->entsize; } diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index ffbc8d94a800..38de4db191f4 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -384,7 +384,7 @@ class PPC64PILongBranchThunk final : public PPC64LongBranchThunk { if (Optional index = in.ppc64LongBranchTarget->addEntry(&dest, addend)) { mainPart->relaDyn->addRelativeReloc( - target->relativeRel, in.ppc64LongBranchTarget, *index * UINT64_C(8), + target->relativeRel, *in.ppc64LongBranchTarget, *index * UINT64_C(8), dest, addend + getPPC64GlobalEntryToLocalEntryOffset(dest.stOther), target->symbolicRel, R_ABS); } @@ -806,8 +806,9 @@ void elf::writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA, // The stub loads an address relative to r30 (.got2+Addend). Addend is // almost always 0x8000. The address of .got2 is different in another object // file, so a stub cannot be shared. - offset = gotPltVA - (in.ppc32Got2->getParent()->getVA() + - file->ppc32Got2OutSecOff + addend); + offset = gotPltVA - + (in.ppc32Got2->getParent()->getVA() + + (file->ppc32Got2 ? file->ppc32Got2->outSecOff : 0) + addend); } else { // The stub loads an address relative to _GLOBAL_OFFSET_TABLE_ (which is // currently the address of .got). diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 9390027487ed..497e56886b72 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -297,7 +297,7 @@ template void elf::createSyntheticSections() { } } - auto add = [](SyntheticSection *sec) { inputSections.push_back(sec); }; + auto add = [](SyntheticSection &sec) { inputSections.push_back(&sec); }; in.shStrTab = make(".shstrtab", false); @@ -311,7 +311,7 @@ template void elf::createSyntheticSections() { } in.bss = make(".bss", 0, 1); - add(in.bss); + add(*in.bss); // If there is a SECTIONS command and a .data.rel.ro section name use name // .data.rel.ro.bss so that we match in the .data.rel.ro output section. @@ -320,42 +320,42 @@ template void elf::createSyntheticSections() { script->hasSectionsCommand && findSection(".data.rel.ro", 0); in.bssRelRo = make(hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); - add(in.bssRelRo); + add(*in.bssRelRo); // Add MIPS-specific sections. if (config->emachine == EM_MIPS) { if (!config->shared && config->hasDynSymTab) { in.mipsRldMap = make(); - add(in.mipsRldMap); + add(*in.mipsRldMap); } if (auto *sec = MipsAbiFlagsSection::create()) - add(sec); + add(*sec); if (auto *sec = MipsOptionsSection::create()) - add(sec); + add(*sec); if (auto *sec = MipsReginfoSection::create()) - add(sec); + add(*sec); } StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn"; for (Partition &part : partitions) { - auto add = [&](SyntheticSection *sec) { - sec->partition = part.getNumber(); - inputSections.push_back(sec); + auto add = [&](SyntheticSection &sec) { + sec.partition = part.getNumber(); + inputSections.push_back(&sec); }; if (!part.name.empty()) { part.elfHeader = make>(); part.elfHeader->name = part.name; - add(part.elfHeader); + add(*part.elfHeader); part.programHeaders = make>(); - add(part.programHeaders); + add(*part.programHeaders); } if (config->buildId != BuildIdKind::None) { part.buildId = make(); - add(part.buildId); + add(*part.buildId); } part.dynStrTab = make(".dynstr", true); @@ -368,53 +368,53 @@ template void elf::createSyntheticSections() { make>(relaDynName, config->zCombreloc); if (config->hasDynSymTab) { - add(part.dynSymTab); + add(*part.dynSymTab); part.verSym = make(); - add(part.verSym); + add(*part.verSym); if (!namedVersionDefs().empty()) { part.verDef = make(); - add(part.verDef); + add(*part.verDef); } part.verNeed = make>(); - add(part.verNeed); + add(*part.verNeed); if (config->gnuHash) { part.gnuHashTab = make(); - add(part.gnuHashTab); + add(*part.gnuHashTab); } if (config->sysvHash) { part.hashTab = make(); - add(part.hashTab); + add(*part.hashTab); } - add(part.dynamic); - add(part.dynStrTab); - add(part.relaDyn); + add(*part.dynamic); + add(*part.dynStrTab); + add(*part.relaDyn); } if (config->relrPackDynRelocs) { part.relrDyn = make>(); - add(part.relrDyn); + add(*part.relrDyn); } if (!config->relocatable) { if (config->ehFrameHdr) { part.ehFrameHdr = make(); - add(part.ehFrameHdr); + add(*part.ehFrameHdr); } part.ehFrame = make(); - add(part.ehFrame); + add(*part.ehFrame); } if (config->emachine == EM_ARM && !config->relocatable) { // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx // InputSections. part.armExidx = make(); - add(part.armExidx); + add(*part.armExidx); } } @@ -424,39 +424,39 @@ template void elf::createSyntheticSections() { // special handling (see createPhdrs() and combineEhSections()). in.partEnd = make(".part.end", config->maxPageSize, 1); in.partEnd->partition = 255; - add(in.partEnd); + add(*in.partEnd); in.partIndex = make(); addOptionalRegular("__part_index_begin", in.partIndex, 0); addOptionalRegular("__part_index_end", in.partIndex, in.partIndex->getSize()); - add(in.partIndex); + add(*in.partIndex); } // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (config->emachine == EM_MIPS) { in.mipsGot = make(); - add(in.mipsGot); + add(*in.mipsGot); } else { in.got = make(); - add(in.got); + add(*in.got); } if (config->emachine == EM_PPC) { in.ppc32Got2 = make(); - add(in.ppc32Got2); + add(*in.ppc32Got2); } if (config->emachine == EM_PPC64) { in.ppc64LongBranchTarget = make(); - add(in.ppc64LongBranchTarget); + add(*in.ppc64LongBranchTarget); } in.gotPlt = make(); - add(in.gotPlt); + add(*in.gotPlt); in.igotPlt = make(); - add(in.igotPlt); + add(*in.igotPlt); // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat // it as a relocation and ensure the referenced section is created. @@ -468,13 +468,13 @@ template void elf::createSyntheticSections() { } if (config->gdbIndex) - add(GdbIndexSection::create()); + add(*GdbIndexSection::create()); // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. in.relaPlt = make>( config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); - add(in.relaPlt); + add(*in.relaPlt); // The relaIplt immediately follows .rel[a].dyn to ensure that the IRelative // relocations are processed last by the dynamic loader. We cannot place the @@ -485,22 +485,22 @@ template void elf::createSyntheticSections() { in.relaIplt = make>( config->androidPackDynRelocs ? in.relaPlt->name : relaDynName, /*sort=*/false); - add(in.relaIplt); + add(*in.relaIplt); if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { in.ibtPlt = make(); - add(in.ibtPlt); + add(*in.ibtPlt); } in.plt = config->emachine == EM_PPC ? make() : make(); - add(in.plt); + add(*in.plt); in.iplt = make(); - add(in.iplt); + add(*in.iplt); if (config->andFeatures) - add(make()); + add(*make()); // .note.GNU-stack is always added when we are creating a re-linkable // object file. Other linkers are using the presence of this marker @@ -508,15 +508,15 @@ template void elf::createSyntheticSections() { // is irrelevant these days. Stack area should always be non-executable // by default. So we emit this section unconditionally. if (config->relocatable) - add(make()); + add(*make()); if (in.symTab) - add(in.symTab); + add(*in.symTab); if (in.symTabShndx) - add(in.symTabShndx); - add(in.shStrTab); + add(*in.symTabShndx); + add(*in.shStrTab); if (in.strTab) - add(in.strTab); + add(*in.strTab); } // The main function of the writer. @@ -674,15 +674,11 @@ static bool shouldKeepInSymtab(const Defined &sym) { } static bool includeInSymtab(const Symbol &b) { - if (!b.isLocal() && !b.isUsedInRegularObj) - return false; - if (auto *d = dyn_cast(&b)) { // Always include absolute symbols. SectionBase *sec = d->section; if (!sec) return true; - sec = sec->repl; // Exclude symbols pointing to garbage-collected sections. if (isa(sec) && !sec->isLive()) @@ -712,11 +708,8 @@ template void Writer::copyLocalSymbols() { // No reason to keep local undefined symbol in symtab. if (!dr) continue; - if (!includeInSymtab(*b)) - continue; - if (!shouldKeepInSymtab(*dr)) - continue; - in.symTab->addSymbol(b); + if (includeInSymtab(*b) && shouldKeepInSymtab(*dr)) + in.symTab->addSymbol(b); } } } @@ -1240,7 +1233,7 @@ static void maybeShuffle(DenseMap &order) { if (config->shuffleSections.empty()) return; - std::vector matched, sections = inputSections; + SmallVector matched, sections = inputSections; matched.reserve(sections.size()); for (const auto &patAndSeed : config->shuffleSections) { matched.clear(); @@ -1308,7 +1301,7 @@ static DenseMap buildSectionOrder() { if (auto *d = dyn_cast(&sym)) { if (auto *sec = dyn_cast_or_null(d->section)) { - int &priority = sectionOrder[cast(sec->repl)]; + int &priority = sectionOrder[cast(sec)]; priority = std::min(priority, ent.priority); } } @@ -1475,13 +1468,6 @@ template void Writer::sortSections() { if (!os) continue; os->sortRank = getSectionRank(os); - - // We want to assign rude approximation values to outSecOff fields - // to know the relative order of the input sections. We use it for - // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder(). - uint64_t i = 0; - for (InputSection *sec : getInputSections(os)) - sec->outSecOff = i++; } if (!script->hasSectionsCommand) { @@ -1738,7 +1724,7 @@ static void fixSymbolsAfterShrinking() { if (!sec) return; - const InputSectionBase *inputSec = dyn_cast(sec->repl); + const InputSectionBase *inputSec = dyn_cast(sec); if (!inputSec || !inputSec->bytesDropped) return; @@ -1985,7 +1971,7 @@ template void Writer::finalizeSections() { // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. for (Symbol *sym : symtab->symbols()) { - if (!includeInSymtab(*sym)) + if (!sym->isUsedInRegularObj || !includeInSymtab(*sym)) continue; if (in.symTab) in.symTab->addSymbol(sym); diff --git a/lld/test/ELF/i386-tls-dynamic.s b/lld/test/ELF/i386-tls-dynamic.s index 07e894795cc0..d61ee5526bb1 100644 --- a/lld/test/ELF/i386-tls-dynamic.s +++ b/lld/test/ELF/i386-tls-dynamic.s @@ -66,35 +66,35 @@ addl tls1@gotntpoff(%ebx),%eax # CHECK: Relocations [ # CHECK: Section ({{.+}}) .rel.dyn { -# CHECK-NEXT: 0x2368 R_386_TLS_DTPMOD32 - +# CHECK-NEXT: 0x2370 R_386_TLS_DTPMOD32 - # CHECK-NEXT: 0x2358 R_386_TLS_DTPMOD32 tls0 # CHECK-NEXT: 0x235C R_386_TLS_DTPOFF32 tls0 -# CHECK-NEXT: 0x2370 R_386_TLS_TPOFF tls0 -# CHECK-NEXT: 0x2360 R_386_TLS_DTPMOD32 tls1 -# CHECK-NEXT: 0x2364 R_386_TLS_DTPOFF32 tls1 -# CHECK-NEXT: 0x2374 R_386_TLS_TPOFF tls1 +# CHECK-NEXT: 0x2360 R_386_TLS_TPOFF tls0 +# CHECK-NEXT: 0x2364 R_386_TLS_DTPMOD32 tls1 +# CHECK-NEXT: 0x2368 R_386_TLS_DTPOFF32 tls1 +# CHECK-NEXT: 0x236C R_386_TLS_TPOFF tls1 # CHECK-NEXT: } # DIS: Disassembly of section .text: # DIS-EMPTY: # DIS-NEXT: <_start>: ## General dynamic model: -## -4128 and -4120 are first and second GOT entries offsets. +## -4128 and -4116 are first and second GOT entries offsets. ## Each one is a pair of records. # DIS-NEXT: 1260: leal -4128(,%ebx), %eax # DIS-NEXT: 1267: calll 0x12d0 -# DIS-NEXT: 126c: leal -4120(,%ebx), %eax +# DIS-NEXT: 126c: leal -4116(,%ebx), %eax # DIS-NEXT: 1273: calll 0x12d0 ## Local dynamic model: ## -16 is a local module tls index offset. -# DIS-NEXT: 1278: leal -4112(%ebx), %eax +# DIS-NEXT: 1278: leal -4104(%ebx), %eax # DIS-NEXT: 127e: calll 0x12d0 # DIS-NEXT: 1283: leal 8(%eax), %edx -# DIS-NEXT: 1289: leal -4112(%ebx), %eax +# DIS-NEXT: 1289: leal -4104(%ebx), %eax # DIS-NEXT: 128f: calll 0x12d0 # DIS-NEXT: 1294: leal 12(%eax), %edx ## Initial exec model: # DIS-NEXT: 129a: movl %gs:0, %eax -# DIS-NEXT: 12a0: addl -4104(%ebx), %eax +# DIS-NEXT: 12a0: addl -4120(%ebx), %eax # DIS-NEXT: 12a6: movl %gs:0, %eax -# DIS-NEXT: 12ac: addl -4100(%ebx), %eax +# DIS-NEXT: 12ac: addl -4108(%ebx), %eax diff --git a/lld/test/ELF/i386-tlsdesc-gd.s b/lld/test/ELF/i386-tlsdesc-gd.s index a2fc0f8f6645..132febed2feb 100644 --- a/lld/test/ELF/i386-tlsdesc-gd.s +++ b/lld/test/ELF/i386-tlsdesc-gd.s @@ -19,18 +19,18 @@ # RUN: llvm-objdump -h -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s # GD-REL: .rel.dyn { -# GD-REL-NEXT: 0x2250 R_386_TLS_DESC - +# GD-REL-NEXT: 0x2258 R_386_TLS_DESC - # GD-REL-NEXT: 0x2248 R_386_TLS_DESC a -# GD-REL-NEXT: 0x2258 R_386_TLS_DESC c +# GD-REL-NEXT: 0x2250 R_386_TLS_DESC c # GD-REL-NEXT: } # GD-REL: Hex dump of section '.got': -# GD-REL-NEXT: 0x00002248 00000000 00000000 00000000 0b000000 -# GD-REL-NEXT: 0x00002258 00000000 00000000 +# GD-REL-NEXT: 0x00002248 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x00002258 00000000 0b000000 # GD-RELA: .rela.dyn { -# GD-RELA-NEXT: 0x225C R_386_TLS_DESC - 0xB +# GD-RELA-NEXT: 0x2264 R_386_TLS_DESC - 0xB # GD-RELA-NEXT: 0x2254 R_386_TLS_DESC a 0x0 -# GD-RELA-NEXT: 0x2264 R_386_TLS_DESC c 0x0 +# GD-RELA-NEXT: 0x225C R_386_TLS_DESC c 0x0 # GD-RELA-NEXT: } # GD-RELA: Hex dump of section '.got': # GD-RELA-NEXT: 0x00002254 00000000 00000000 00000000 00000000 @@ -44,14 +44,14 @@ # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax -# &.rel.dyn[b]-.got.plt = 0x2250-0x2260 = -16 -# GD-NEXT: leal -16(%ebx), %eax +# &.rel.dyn[b]-.got.plt = 0x2258-0x2260 = -8 +# GD-NEXT: leal -8(%ebx), %eax # GD-NEXT: movl %edx, %ebx # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax -# &.rel.dyn[c]-.got.plt = 0x2258-0x2260 = -8 -# GD-NEXT: leal -8(%ebx), %eax +# &.rel.dyn[c]-.got.plt = 0x2250-0x2260 = -16 +# GD-NEXT: leal -16(%ebx), %eax # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax diff --git a/lld/test/ELF/lazy-arch-conflict.s b/lld/test/ELF/lazy-arch-conflict.s index 991476c5d6ec..b6b41ae1025a 100644 --- a/lld/test/ELF/lazy-arch-conflict.s +++ b/lld/test/ELF/lazy-arch-conflict.s @@ -4,4 +4,4 @@ # RUN: echo '.globl foo; foo:' | llvm-mc -filetype=obj -triple=i686-pc-linux - -o %t32.o # RUN: not ld.lld %t64.o --start-lib %t32.o --end-lib -o /dev/null 2>&1 | FileCheck %s -# CHECK: error: incompatible file: {{.*}}32.o +# CHECK: error: {{.*}}32.o is incompatible with {{.*}}64.o diff --git a/lld/test/ELF/linkerscript/ppc32-got2.s b/lld/test/ELF/linkerscript/ppc32-got2.s new file mode 100644 index 000000000000..1e232fa2c3c4 --- /dev/null +++ b/lld/test/ELF/linkerscript/ppc32-got2.s @@ -0,0 +1,59 @@ +# REQUIRES: ppc +## Test .got2 placed in a different output section. + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=powerpc %t/a.s -o %t/a.o +# RUN: llvm-mc -filetype=obj -triple=powerpc %t/b.s -o %t/b.o +# RUN: ld.lld -shared -T %t/t %t/a.o %t/b.o -o %t/a.so +# RUN: llvm-readobj -r %t/a.so | FileCheck --check-prefix=RELOC %s +# RUN: llvm-readelf -S %t/a.so | FileCheck --check-prefix=SEC %s + +# RELOC: .rela.plt { +# RELOC-NEXT: 0x1A4 R_PPC_JMP_SLOT f 0x0 +# RELOC-NEXT: } + +# SEC: .got PROGBITS 0000018c +# SEC-NEXT: .rodata PROGBITS 00000198 + +## .got2+0x8000-0xb0 = .rodata+4+0x8000-0xb0 = 0x198+4+0x8000-0xb0 = 65536*1-32532 +# CHECK: <_start>: +# CHECK-NEXT: bcl 20, 31, 0x +# CHECK-NEXT: b0: mflr 30 +# CHECK-NEXT: addis 30, 30, 1 +# CHECK-NEXT: addi 30, 30, -32532 +# CHECK-NEXT: bl {{.*}} <00008000.got2.plt_pic32.f> + +## &.got[2] - (.got2+0x8000) = &.got[2] - (.rodata+4+0x8000) = 0x1A4 - (0x198+4+0x8000) = -32760 +# CHECK: <00008000.got2.plt_pic32.f>: +# CHECK-NEXT: lwz 11, -32760(30) +# CHECK-NEXT: mtctr 11 +# CHECK-NEXT: bctr +# CHECK-NEXT: nop + +#--- a.s +.section .rodata.cst4,"aM",@progbits,4 +.long 1 + +.section .got2,"aw" +.long f + +.text +.globl _start, f, g +_start: + bcl 20,31,.L +.L: + mflr 30 + addis 30, 30, .got2+0x8000-.L@ha + addi 30, 30, .got2+0x8000-.L@l + bl f+0x8000@plt + +#--- b.s +.section .got2,"aw" +.globl f +f: + bl f+0x8000@plt + +#--- t +SECTIONS { + .rodata : { *(.rodata .rodata.*) *(.got2) } +} diff --git a/lld/test/ELF/ppc32-relocatable-got2.s b/lld/test/ELF/ppc32-relocatable-got2.s index 06ccb39b6a4a..85a0764ffe5a 100644 --- a/lld/test/ELF/ppc32-relocatable-got2.s +++ b/lld/test/ELF/ppc32-relocatable-got2.s @@ -3,10 +3,11 @@ ## If r_addend indicates .got2, adjust it by the local .got2's output section offset. # RUN: llvm-mc -filetype=obj -triple=powerpc %s -o %t.o -# RUN: ld.lld -r %t.o %t.o -o %t +# RUN: echo 'bl f+0x8000@plt' | llvm-mc -filetype=obj -triple=powerpc - -o %t2.o +# RUN: ld.lld -r %t.o %t.o %t2.o -o %t # RUN: llvm-readobj -r %t | FileCheck %s -# RUN: ld.lld -shared --emit-relocs %t.o %t.o -o %t.so +# RUN: ld.lld -shared --emit-relocs %t.o %t.o %t2.o -o %t.so # RUN: llvm-readobj -r %t.so | FileCheck %s # CHECK: .rela.adjust { @@ -23,6 +24,11 @@ # CHECK-NEXT: R_PPC_PLTREL24 foo 0x0 # CHECK-NEXT: R_PPC_PLTREL24 foo 0x0 # CHECK-NEXT: } +## %t2.o has an invalid relocation with r_addend=0x8000. Test we don't crash. +## The addend doesn't matter. +# CHECK-NEXT: .rela.text { +# CHECK-NEXT: R_PPC_PLTREL24 f 0x8000 +# CHECK-NEXT: } .section .got2,"aw" .long 0 diff --git a/lld/test/ELF/riscv-tls-ld.s b/lld/test/ELF/riscv-tls-ld.s index f47964ef3e26..bc9a601a74ec 100644 --- a/lld/test/ELF/riscv-tls-ld.s +++ b/lld/test/ELF/riscv-tls-ld.s @@ -35,26 +35,26 @@ ## a@dtprel = st_value(a)-0x800 = 0xfffff808 is a link-time constant. # LD32-REL: .rela.dyn { -# LD32-REL-NEXT: 0x22B4 -# LD32-REL-NEXT: 0x22AC R_RISCV_TLS_DTPMOD32 - 0x0 +# LD32-REL-NEXT: 0x22AC +# LD32-REL-NEXT: 0x22B0 R_RISCV_TLS_DTPMOD32 - 0x0 # LD32-REL-NEXT: } # LD32-GOT: section '.got': -# LD32-GOT-NEXT: 0x000022a8 30220000 00000000 00f8ffff 00000000 +# LD32-GOT-NEXT: 0x000022a8 30220000 00000000 00000000 00f8ffff # LD64-REL: .rela.dyn { -# LD64-REL-NEXT: 0x2458 -# LD64-REL-NEXT: 0x2448 R_RISCV_TLS_DTPMOD64 - 0x0 +# LD64-REL-NEXT: 0x2448 +# LD64-REL-NEXT: 0x2450 R_RISCV_TLS_DTPMOD64 - 0x0 # LD64-REL-NEXT: } # LD64-GOT: section '.got': # LD64-GOT-NEXT: 0x00002440 50230000 00000000 00000000 00000000 -# LD64-GOT-NEXT: 0x00002450 00f8ffff ffffffff 00000000 00000000 +# LD64-GOT-NEXT: 0x00002450 00000000 00000000 00f8ffff ffffffff -## rv32: &DTPMOD(a) - . = 0x22ac - 0x11d8 = 4096*1+212 -## rv64: &DTPMOD(a) - . = 0x2448 - 0x12f8 = 4096*1+336 +## rv32: &DTPMOD(a) - . = 0x22b0 - 0x11d8 = 4096*1+216 +## rv64: &DTPMOD(a) - . = 0x2450 - 0x12f8 = 4096*1+344 # LD32: 11d8: auipc a0, 1 -# LD32-NEXT: addi a0, a0, 212 +# LD32-NEXT: addi a0, a0, 216 # LD64: 12f8: auipc a0, 1 -# LD64-NEXT: addi a0, a0, 336 +# LD64-NEXT: addi a0, a0, 344 # LD-NEXT: auipc ra, 0 # LD-NEXT: jalr 64(ra) @@ -63,18 +63,18 @@ ## a is local - its DTPMOD/DTPREL slots are link-time constants. ## a@dtpmod = 1 (main module) # LE32-GOT: section '.got': -# LE32-GOT-NEXT: 0x00012134 00000000 01000000 00f8ffff 34210100 +# LE32-GOT-NEXT: 0x00012134 00000000 34210100 01000000 00f8ffff # LE64-GOT: section '.got': -# LE64-GOT-NEXT: 0x000121e8 00000000 00000000 01000000 00000000 -# LE64-GOT-NEXT: 0x000121f8 00f8ffff ffffffff e8210100 00000000 +# LE64-GOT-NEXT: 0x000121e8 00000000 00000000 e8210100 00000000 +# LE64-GOT-NEXT: 0x000121f8 01000000 00000000 00f8ffff ffffffff -## rv32: DTPMOD(.LANCHOR0) - . = 0x12138 - 0x11114 = 4096*1+36 -## rv64: DTPMOD(.LANCHOR0) - . = 0x121f0 - 0x111c8 = 4096*1+40 +## rv32: DTPMOD(.LANCHOR0) - . = 0x1213c - 0x11114 = 4096*1+40 +## rv64: DTPMOD(.LANCHOR0) - . = 0x121f8 - 0x111c8 = 4096*1+48 # LE32: 11114: auipc a0, 1 -# LE32-NEXT: addi a0, a0, 36 +# LE32-NEXT: addi a0, a0, 40 # LE64: 111c8: auipc a0, 1 -# LE64-NEXT: addi a0, a0, 40 +# LE64-NEXT: addi a0, a0, 48 # LE-NEXT: auipc ra, 0 # LE-NEXT: jalr 24(ra) diff --git a/lld/test/ELF/x86-64-tls-ie.s b/lld/test/ELF/x86-64-tls-ie.s index e5510f84d5f3..8190c569f0fe 100644 --- a/lld/test/ELF/x86-64-tls-ie.s +++ b/lld/test/ELF/x86-64-tls-ie.s @@ -4,7 +4,7 @@ // RUN: ld.lld -shared %t2.o -soname=so -o %t2.so // RUN: ld.lld -e main %t1.o %t2.so -o %t3 // RUN: llvm-readobj -S -r %t3 | FileCheck %s -// RUN: llvm-objdump -d %t3 | FileCheck --check-prefix=DISASM %s +// RUN: llvm-objdump -d --no-show-raw-insn %t3 | FileCheck --check-prefix=DISASM %s // CHECK: Section { // CHECK: Index: 9 @@ -15,7 +15,7 @@ // CHECK-NEXT: SHF_WRITE // CHECK-NEXT: ] // CHECK-NEXT: Address: [[ADDR:.*]] -// CHECK-NEXT: Offset: 0x3B0 +// CHECK-NEXT: Offset: 0x3F0 // CHECK-NEXT: Size: 16 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 @@ -26,23 +26,27 @@ // CHECK: Relocations [ // CHECK-NEXT: Section (5) .rela.dyn { // CHECK-NEXT: [[ADDR]] R_X86_64_TPOFF64 tls1 0x0 -// CHECK-NEXT: 0x2023B8 R_X86_64_TPOFF64 tls0 0x0 +// CHECK-NEXT: 0x2023F8 R_X86_64_TPOFF64 tls0 0x0 // CHECK-NEXT: } // CHECK-NEXT: ] -// 0x2012d0 + 4313 + 7 = 0x2023B0 -// 0x2012dA + 4311 + 7 = 0x2023B8 -// 0x2012e4 + 4301 + 7 = 0x2023B8 +/// 0x2023F0 - 0x201307 = 4329 +/// 0x2023F8 - 0x201311 = 4327 +/// 0x2023F8 - 0x20131b = 4317 // DISASM: Disassembly of section .text: // DISASM-EMPTY: // DISASM-NEXT:
: -// DISASM-NEXT: 2012d0: {{.*}} movq 4313(%rip), %rax -// DISASM-NEXT: 2012d7: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012da: {{.*}} movq 4311(%rip), %rax -// DISASM-NEXT: 2012e1: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012e4: {{.*}} movq 4301(%rip), %rax -// DISASM-NEXT: 2012eb: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012ee: {{.*}} retq +// DISASM-NEXT: movq 4329(%rip), %rax +// DISASM-NEXT: 201307: movl %fs:(%rax), %eax +// DISASM-NEXT: movq 4327(%rip), %rax +// DISASM-NEXT: 201311: movl %fs:(%rax), %eax +// DISASM-NEXT: movq 4317(%rip), %rax +// DISASM-NEXT: 20131b: movl %fs:(%rax), %eax + +/// 0x2023F0 - 0x20132e = 4290 +// DISASM-NEXT: movq %fs:0, %rax +// DISASM-NEXT: addq 4290(%rip), %rax +// DISASM-NEXT: 20132e: retq .section .tdata,"awT",@progbits @@ -57,4 +61,11 @@ main: movl %fs:0(%rax), %eax movq tls0@GOTTPOFF(%rip), %rax movl %fs:0(%rax), %eax + +## Relaxed to TLS IE. Share the GOT entry with GOTTPOFF. + .byte 0x66 + leaq tls1@tlsgd(%rip), %rdi + .value 0x6666 + rex64 + call __tls_get_addr@PLT ret diff --git a/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s b/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s new file mode 100644 index 000000000000..f6bfe5bbba1c --- /dev/null +++ b/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s @@ -0,0 +1,25 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readobj -r %t.so | FileCheck %s --check-prefix=RELA + +## FIXME Both TLSDESC and DTPMOD64/DTPOFF64 should be present. +# RELA: .rela.dyn { +# RELA-NEXT: 0x2430 R_X86_64_TLSDESC a 0x0 +# RELA-NEXT: } + +leaq a@tlsdesc(%rip), %rax +call *a@tlscall(%rax) +movl %fs:(%rax), %eax + +.byte 0x66 +leaq a@tlsgd(%rip), %rdi +.word 0x6666 +rex64 +call __tls_get_addr@PLT + +.section .tbss +.globl a +.zero 8 +a: +.zero 4 diff --git a/lld/test/ELF/x86-64-tlsdesc-gd.s b/lld/test/ELF/x86-64-tlsdesc-gd.s index f0cdf08040f3..436216e7d3d5 100644 --- a/lld/test/ELF/x86-64-tlsdesc-gd.s +++ b/lld/test/ELF/x86-64-tlsdesc-gd.s @@ -19,9 +19,9 @@ # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s # GD-RELA: .rela.dyn { -# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC - 0xB +# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC - 0xB # GD-RELA-NEXT: 0x23B0 R_X86_64_TLSDESC a 0x0 -# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC c 0x0 +# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC c 0x0 # GD-RELA-NEXT: } # GD-RELA: Hex dump of section '.got': # GD-RELA-NEXT: 0x000023b0 00000000 00000000 00000000 00000000 @@ -29,28 +29,28 @@ # GD-RELA-NEXT: 0x000023d0 00000000 00000000 00000000 00000000 # GD-REL: .rel.dyn { -# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC - +# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC - # GD-REL-NEXT: 0x2398 R_X86_64_TLSDESC a -# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC c +# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC c # GD-REL-NEXT: } # GD-REL: Hex dump of section '.got': # GD-REL-NEXT: 0x00002398 00000000 00000000 00000000 00000000 -# GD-REL-NEXT: 0x000023a8 00000000 00000000 0b000000 00000000 -# GD-REL-NEXT: 0x000023b8 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x000023a8 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x000023b8 00000000 00000000 0b000000 00000000 ## &.rela.dyn[a]-pc = 0x23B0-0x12e7 = 4297 # GD: leaq 4297(%rip), %rax # GD-NEXT: 12e7: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax -## &.rela.dyn[b]-pc = 0x23C0-0x12f3 = 4301 -# GD-NEXT: leaq 4301(%rip), %rcx +## &.rela.dyn[b]-pc = 0x23D0-0x12f3 = 4317 +# GD-NEXT: leaq 4317(%rip), %rcx # GD-NEXT: 12f3: movq %rcx, %rax # GD-NEXT: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax -## &.rela.dyn[c]-pc = 0x23D0-0x1302 = 4302 -# GD-NEXT: leaq 4302(%rip), %r15 +## &.rela.dyn[c]-pc = 0x23C0-0x1302 = 4286 +# GD-NEXT: leaq 4286(%rip), %r15 # GD-NEXT: 1302: movq %r15, %rax # GD-NEXT: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test index 35de693e9ea7..9cb1cc31e515 100644 --- a/lld/test/wasm/debuginfo.test +++ b/lld/test/wasm/debuginfo.test @@ -44,23 +44,9 @@ CHECK-NEXT: DW_AT_producer ("clang version 7.0.0 (trunk {{.*}})") CHECK-NEXT: DW_AT_language (DW_LANG_C99) CHECK-NEXT: DW_AT_name ("hi_foo.c") -CHECK: DW_TAG_subprogram -CHECK-NEXT: DW_AT_low_pc -CHECK-NEXT: DW_AT_high_pc -CHECK-NEXT: DW_AT_frame_base -CHECK-NEXT: DW_AT_name ("foo") -CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") -CHECK-NEXT: DW_AT_decl_line (3) - -CHECK: DW_TAG_formal_parameter -CHECK-NEXT: DW_AT_location (DW_OP_WASM_location 0x0 0x0, DW_OP_stack_value) -CHECK-NEXT: DW_AT_name ("p") -CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") -CHECK-NEXT: DW_AT_decl_line (3) - CHECK: DW_TAG_variable CHECK-NEXT: DW_AT_name ("y") -CHECK-NEXT: DW_AT_type (0x000000d4 "int[2]") +CHECK-NEXT: DW_AT_type (0x000000ac "int[2]") CHECK-NEXT: DW_AT_external (true) CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") CHECK-NEXT: DW_AT_decl_line (1) @@ -82,9 +68,23 @@ CHECK-NEXT: DW_AT_encoding (DW_ATE_unsigned) CHECK: DW_TAG_variable CHECK-NEXT: DW_AT_name ("z") -CHECK-NEXT: DW_AT_type (0x000000d4 "int[2]") +CHECK-NEXT: DW_AT_type (0x000000ac "int[2]") CHECK-NEXT: DW_AT_external (true) CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") CHECK-NEXT: DW_AT_decl_line (8) CHECK-NEXT: DW_AT_location (DW_OP_addr 0xffffffff) +CHECK: DW_TAG_subprogram +CHECK-NEXT: DW_AT_low_pc +CHECK-NEXT: DW_AT_high_pc +CHECK-NEXT: DW_AT_frame_base +CHECK-NEXT: DW_AT_name ("foo") +CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") +CHECK-NEXT: DW_AT_decl_line (3) + +CHECK: DW_TAG_formal_parameter +CHECK-NEXT: DW_AT_location (DW_OP_WASM_location 0x0 0x0, DW_OP_stack_value) +CHECK-NEXT: DW_AT_name ("p") +CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") +CHECK-NEXT: DW_AT_decl_line (3) + diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 5c2dac3f51f6..79d451965ed4 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.13.4) +include(GNUInstallDirs) + # Add path for custom modules. set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} diff --git a/lldb/bindings/python/python-extensions.swig b/lldb/bindings/python/python-extensions.swig index b98b0d458f0c..398df708830a 100644 --- a/lldb/bindings/python/python-extensions.swig +++ b/lldb/bindings/python/python-extensions.swig @@ -563,6 +563,7 @@ def is_numeric_type(basic_type): if basic_type == eBasicTypeWChar: return (True,False) if basic_type == eBasicTypeSignedWChar: return (True,True) if basic_type == eBasicTypeUnsignedWChar: return (True,False) + if basic_type == eBasicTypeChar8: return (True,False) if basic_type == eBasicTypeChar16: return (True,False) if basic_type == eBasicTypeChar32: return (True,False) if basic_type == eBasicTypeShort: return (True,True) diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index 25c2f44106bc..7d639e664f53 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -5,38 +5,11 @@ PyObject *SBTypeToSWIGWrapper(lldb::SBEvent &event_sb) { return SWIG_NewPointerObj(&event_sb, SWIGTYPE_p_lldb__SBEvent, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBWatchpoint &watchpoint_sb) { - return SWIG_NewPointerObj(&watchpoint_sb, SWIGTYPE_p_lldb__SBWatchpoint, 0); -} - -PyObject * -SBTypeToSWIGWrapper(lldb::SBBreakpointLocation &breakpoint_location_sb) { - return SWIG_NewPointerObj(&breakpoint_location_sb, - SWIGTYPE_p_lldb__SBBreakpointLocation, 0); -} - PyObject *SBTypeToSWIGWrapper(lldb::SBCommandReturnObject &cmd_ret_obj_sb) { return SWIG_NewPointerObj(&cmd_ret_obj_sb, SWIGTYPE_p_lldb__SBCommandReturnObject, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBExecutionContext &ctx_sb) { - return SWIG_NewPointerObj(&ctx_sb, SWIGTYPE_p_lldb__SBExecutionContext, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBTypeSummaryOptions &summary_options_sb) { - return SWIG_NewPointerObj(&summary_options_sb, - SWIGTYPE_p_lldb__SBTypeSummaryOptions, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBSymbolContext &sym_ctx_sb) { - return SWIG_NewPointerObj(&sym_ctx_sb, SWIGTYPE_p_lldb__SBSymbolContext, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBStream &stream_sb) { - return SWIG_NewPointerObj(&stream_sb, SWIGTYPE_p_lldb__SBStream, 0); -} - PythonObject ToSWIGHelper(void *obj, swig_type_info *info) { return {PyRefType::Owned, SWIG_NewPointerObj(obj, info, SWIG_POINTER_OWN)}; } @@ -69,6 +42,10 @@ PythonObject ToSWIGWrapper(lldb::BreakpointSP breakpoint_sp) { SWIGTYPE_p_lldb__SBBreakpoint); } +PythonObject ToSWIGWrapper(std::unique_ptr stream_sb) { + return ToSWIGHelper(stream_sb.release(), SWIGTYPE_p_lldb__SBStream); +} + PythonObject ToSWIGWrapper(std::unique_ptr data_sb) { return ToSWIGHelper(data_sb.release(), SWIGTYPE_p_lldb__SBStructuredData); } @@ -92,5 +69,30 @@ PythonObject ToSWIGWrapper(lldb::DebuggerSP debugger_sp) { SWIGTYPE_p_lldb__SBDebugger); } +PythonObject ToSWIGWrapper(lldb::WatchpointSP watchpoint_sp) { + return ToSWIGHelper(new lldb::SBWatchpoint(std::move(watchpoint_sp)), + SWIGTYPE_p_lldb__SBWatchpoint); +} + +PythonObject ToSWIGWrapper(lldb::BreakpointLocationSP bp_loc_sp) { + return ToSWIGHelper(new lldb::SBBreakpointLocation(std::move(bp_loc_sp)), + SWIGTYPE_p_lldb__SBBreakpointLocation); +} + +PythonObject ToSWIGWrapper(lldb::ExecutionContextRefSP ctx_sp) { + return ToSWIGHelper(new lldb::SBExecutionContext(std::move(ctx_sp)), + SWIGTYPE_p_lldb__SBExecutionContext); +} + +PythonObject ToSWIGWrapper(const TypeSummaryOptions &summary_options) { + return ToSWIGHelper(new lldb::SBTypeSummaryOptions(summary_options), + SWIGTYPE_p_lldb__SBTypeSummaryOptions); +} + +PythonObject ToSWIGWrapper(const SymbolContext &sym_ctx) { + return ToSWIGHelper(new lldb::SBSymbolContext(sym_ctx), + SWIGTYPE_p_lldb__SBSymbolContext); +} + } // namespace python } // namespace lldb_private diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 5f85af2ba6ce..a2c1f756a0a2 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -38,14 +38,12 @@ llvm::Expected lldb_private::LLDBSwigPythonBreakpointCallbackFunction( return arg_info.takeError(); PythonObject frame_arg = ToSWIGWrapper(frame_sp); - PythonObject bp_loc_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_bp_loc)); + PythonObject bp_loc_arg = ToSWIGWrapper(bp_loc_sp); - auto result = [&]() -> Expected { - // If the called function doesn't take extra_args, drop them here: - if (max_positional_args < 4) - return pfunc.Call(frame_arg, bp_loc_arg, dict); - return pfunc.Call(frame_arg, bp_loc_arg, ToSWIGWrapper(args_impl), dict); - }(); + auto result = + max_positional_args < 4 + ? pfunc.Call(frame_arg, bp_loc_arg, dict) + : pfunc.Call(frame_arg, bp_loc_arg, ToSWIGWrapper(args_impl), dict); if (!result) return result.takeError(); @@ -70,7 +68,6 @@ llvm::Expected lldb_private::LLDBSwigPythonBreakpointCallbackFunction( bool lldb_private::LLDBSwigPythonWatchpointCallbackFunction( const char *python_function_name, const char *session_dictionary_name, const lldb::StackFrameSP &frame_sp, const lldb::WatchpointSP &wp_sp) { - lldb::SBWatchpoint sb_wp(wp_sp); bool stop_at_watchpoint = true; @@ -84,9 +81,8 @@ bool lldb_private::LLDBSwigPythonWatchpointCallbackFunction( if (!pfunc.IsAllocated()) return stop_at_watchpoint; - PythonObject frame_arg = ToSWIGWrapper(frame_sp); - PythonObject wp_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_wp)); - PythonObject result = pfunc(frame_arg, wp_arg, dict); + PythonObject result = + pfunc(ToSWIGWrapper(frame_sp), ToSWIGWrapper(wp_sp), dict); if (result.get() == Py_False) stop_at_watchpoint = false; @@ -98,7 +94,6 @@ bool lldb_private::LLDBSwigPythonCallTypeScript( const char *python_function_name, const void *session_dictionary, const lldb::ValueObjectSP &valobj_sp, void **pyfunct_wrapper, const lldb::TypeSummaryOptionsSP &options_sp, std::string &retval) { - lldb::SBTypeSummaryOptions sb_options(options_sp.get()); retval.clear(); @@ -146,12 +141,11 @@ bool lldb_private::LLDBSwigPythonCallTypeScript( } PythonObject value_arg = ToSWIGWrapper(valobj_sp); - PythonObject options_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_options)); if (argc.get().max_positional_args < 3) result = pfunc(value_arg, dict); else - result = pfunc(value_arg, dict, options_arg); + result = pfunc(value_arg, dict, ToSWIGWrapper(*options_sp)); retval = result.Str().GetString().str(); @@ -452,13 +446,7 @@ unsigned int lldb_private::LLDBSwigPythonCallBreakpointResolver( if (!pfunc.IsAllocated()) return 0; - PythonObject result; - if (sym_ctx != nullptr) { - lldb::SBSymbolContext sb_sym_ctx(sym_ctx); - PythonObject sym_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_sym_ctx)); - result = pfunc(sym_ctx_arg); - } else - result = pfunc(); + PythonObject result = sym_ctx ? pfunc(ToSWIGWrapper(*sym_ctx)) : pfunc(); if (PyErr_Occurred()) { PyErr_Print(); @@ -560,12 +548,11 @@ bool lldb_private::LLDBSwigPythonStopHookCallHandleStop( if (!pfunc.IsAllocated()) return true; - PythonObject result; - lldb::SBExecutionContext sb_exc_ctx(exc_ctx_sp); - PythonObject exc_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_exc_ctx)); - lldb::SBStream sb_stream; - PythonObject sb_stream_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_stream)); - result = pfunc(exc_ctx_arg, sb_stream_arg); + auto *sb_stream = new lldb::SBStream(); + PythonObject sb_stream_arg = + ToSWIGWrapper(std::unique_ptr(sb_stream)); + PythonObject result = + pfunc(ToSWIGWrapper(std::move(exc_ctx_sp)), sb_stream_arg); if (PyErr_Occurred()) { stream->PutCString("Python error occurred handling stop-hook."); @@ -577,7 +564,7 @@ bool lldb_private::LLDBSwigPythonStopHookCallHandleStop( // Now add the result to the output stream. SBStream only // makes an internally help StreamString which I can't interpose, so I // have to copy it over here. - stream->PutCString(sb_stream.GetData()); + stream->PutCString(sb_stream->GetData()); if (result.get() == Py_False) return false; @@ -809,7 +796,6 @@ bool lldb_private::LLDBSwigPythonCallCommand( lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp) { lldb::SBCommandReturnObject cmd_retobj_sb(cmd_retobj); - lldb::SBExecutionContext exe_ctx_sb(exe_ctx_ref_sp); PyErr_Cleaner py_err_cleaner(true); auto dict = PythonModule::MainModule().ResolveName( @@ -826,14 +812,14 @@ bool lldb_private::LLDBSwigPythonCallCommand( return false; } PythonObject debugger_arg = ToSWIGWrapper(std::move(debugger)); - PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb)); PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(cmd_retobj_sb)); if (argc.get().max_positional_args < 5u) pfunc(debugger_arg, PythonString(args), cmd_retobj_arg, dict); else - pfunc(debugger_arg, PythonString(args), exe_ctx_arg, cmd_retobj_arg, dict); + pfunc(debugger_arg, PythonString(args), + ToSWIGWrapper(std::move(exe_ctx_ref_sp)), cmd_retobj_arg, dict); return true; } @@ -843,7 +829,6 @@ bool lldb_private::LLDBSwigPythonCallCommandObject( lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp) { lldb::SBCommandReturnObject cmd_retobj_sb(cmd_retobj); - lldb::SBExecutionContext exe_ctx_sb(exe_ctx_ref_sp); PyErr_Cleaner py_err_cleaner(true); @@ -853,12 +838,11 @@ bool lldb_private::LLDBSwigPythonCallCommandObject( if (!pfunc.IsAllocated()) return false; - PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb)); PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(cmd_retobj_sb)); - pfunc(ToSWIGWrapper(std::move(debugger)), PythonString(args), exe_ctx_arg, - cmd_retobj_arg); + pfunc(ToSWIGWrapper(std::move(debugger)), PythonString(args), + ToSWIGWrapper(exe_ctx_ref_sp), cmd_retobj_arg); return true; } diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index 8be214a8509a..3291a7c808e1 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -1,3 +1,5 @@ +include(GNUInstallDirs) + function(lldb_tablegen) # Syntax: # lldb_tablegen output-file [tablegen-arg ...] SOURCE source-file @@ -113,7 +115,7 @@ function(add_lldb_library name) endif() # RUNTIME is relevant for DLL platforms, FRAMEWORK for macOS install(TARGETS ${name} COMPONENT ${name} - RUNTIME DESTINATION bin + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" LIBRARY DESTINATION ${install_dest} ARCHIVE DESTINATION ${install_dest} FRAMEWORK DESTINATION ${install_dest}) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 01c501a7f717..e12e548ad0c4 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -231,7 +231,7 @@ include_directories(BEFORE if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/ COMPONENT lldb-headers - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" FILES_MATCHING PATTERN "*.h" PATTERN ".cmake" EXCLUDE @@ -239,7 +239,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/ COMPONENT lldb-headers - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" FILES_MATCHING PATTERN "*.h" PATTERN ".cmake" EXCLUDE diff --git a/lldb/docs/lldb-gdb-remote.txt b/lldb/docs/lldb-gdb-remote.txt index 17588021e313..980dc77c86f5 100644 --- a/lldb/docs/lldb-gdb-remote.txt +++ b/lldb/docs/lldb-gdb-remote.txt @@ -1003,6 +1003,9 @@ vendor: is a string that represents the vendor (apple) endian: is one of "little", "big", or "pdp" ptrsize: is a number that represents how big pointers are in bytes +main-binary-uuid: is the UUID of a firmware type binary that the gdb stub knows about +main-binary-address: is the load address of the firmware type binary +main-binary-slide: is the slide of the firmware type binary, if address isn't known //---------------------------------------------------------------------- // "qShlibInfoAddr" diff --git a/lldb/docs/python_api_enums.rst b/lldb/docs/python_api_enums.rst index 58100b2aa9a2..1c363381a11b 100644 --- a/lldb/docs/python_api_enums.rst +++ b/lldb/docs/python_api_enums.rst @@ -1017,6 +1017,7 @@ BasicType .. py:data:: eBasicTypeWChar .. py:data:: eBasicTypeSignedWChar .. py:data:: eBasicTypeUnsignedWChar +.. py:data:: eBasicTypeChar8 .. py:data:: eBasicTypeChar16 .. py:data:: eBasicTypeChar32 .. py:data:: eBasicTypeShort diff --git a/lldb/include/lldb/API/SBSymbolContext.h b/lldb/include/lldb/API/SBSymbolContext.h index 16ad29ea8730..b4c5921d81a9 100644 --- a/lldb/include/lldb/API/SBSymbolContext.h +++ b/lldb/include/lldb/API/SBSymbolContext.h @@ -25,7 +25,7 @@ class LLDB_API SBSymbolContext { SBSymbolContext(const lldb::SBSymbolContext &rhs); - SBSymbolContext(const lldb_private::SymbolContext *sc_ptr); + SBSymbolContext(const lldb_private::SymbolContext &sc_ptr); ~SBSymbolContext(); @@ -72,8 +72,6 @@ class LLDB_API SBSymbolContext { lldb_private::SymbolContext *get() const; - void SetSymbolContext(const lldb_private::SymbolContext *sc_ptr); - private: std::unique_ptr m_opaque_up; }; diff --git a/lldb/include/lldb/API/SBTypeSummary.h b/lldb/include/lldb/API/SBTypeSummary.h index 929bfb6124b2..e9963682f7ab 100644 --- a/lldb/include/lldb/API/SBTypeSummary.h +++ b/lldb/include/lldb/API/SBTypeSummary.h @@ -19,7 +19,7 @@ class LLDB_API SBTypeSummaryOptions { SBTypeSummaryOptions(const lldb::SBTypeSummaryOptions &rhs); - SBTypeSummaryOptions(const lldb_private::TypeSummaryOptions *lldb_object_ptr); + SBTypeSummaryOptions(const lldb_private::TypeSummaryOptions &lldb_object); ~SBTypeSummaryOptions(); @@ -48,8 +48,6 @@ class LLDB_API SBTypeSummaryOptions { const lldb_private::TypeSummaryOptions &ref() const; - void SetOptions(const lldb_private::TypeSummaryOptions *lldb_object_ptr); - private: std::unique_ptr m_opaque_up; }; diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 5f1cbc65b320..192149f05436 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -1000,7 +1000,7 @@ class ValueObject { void SetPreferredDisplayLanguageIfNeeded(lldb::LanguageType); protected: - virtual void DoUpdateChildrenAddressType(ValueObject &valobj) { return; }; + virtual void DoUpdateChildrenAddressType(ValueObject &valobj){}; private: virtual CompilerType MaybeCalculateCompleteType(); diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index bfc5d382dbf9..8bf047cd4514 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -527,11 +527,15 @@ class ObjectFile : public std::enable_shared_from_this, /// binary is exactly which removes ambiguity when there are multiple /// binaries present in the captured memory pages. /// - /// \param[out] address - /// If the address of the binary is specified, this will be set. - /// This is an address is the virtual address space of the core file - /// memory segments; it is not an offset into the object file. - /// If no address is available, will be set to LLDB_INVALID_ADDRESS. + /// \param[out] value + /// The address or offset (slide) where the binary is loaded in memory. + /// LLDB_INVALID_ADDRESS for unspecified. If an offset is given, + /// this offset should be added to the binary's file address to get + /// the load address. + /// + /// \param[out] value_is_offset + /// Specifies if \b value is a load address, or an offset to calculate + /// the load address. /// /// \param[out] uuid /// If the uuid of the binary is specified, this will be set. @@ -543,9 +547,11 @@ class ObjectFile : public std::enable_shared_from_this, /// /// \return /// Returns true if either address or uuid has been set. - virtual bool GetCorefileMainBinaryInfo(lldb::addr_t &address, UUID &uuid, + virtual bool GetCorefileMainBinaryInfo(lldb::addr_t &value, + bool &value_is_offset, UUID &uuid, ObjectFile::BinaryType &type) { - address = LLDB_INVALID_ADDRESS; + value = LLDB_INVALID_ADDRESS; + value_is_offset = false; uuid.Clear(); return false; } diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h index 2f95c2643318..ba96d080f908 100644 --- a/lldb/include/lldb/Target/LanguageRuntime.h +++ b/lldb/include/lldb/Target/LanguageRuntime.h @@ -141,7 +141,7 @@ class LanguageRuntime : public Runtime, public PluginInterface { return false; } - virtual void SymbolsDidLoad(const ModuleList &module_list) { return; } + virtual void SymbolsDidLoad(const ModuleList &module_list) {} virtual lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread, bool stop_others) = 0; diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 118fdfd85fa9..422f90d807a7 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -360,7 +360,6 @@ template class RangeVector { m_entries.erase(next); } } - return; } Collection m_entries; diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index f5cabc02bd84..b459d6b7ec3b 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -747,6 +747,7 @@ enum BasicType { eBasicTypeWChar, eBasicTypeSignedWChar, eBasicTypeUnsignedWChar, + eBasicTypeChar8, eBasicTypeChar16, eBasicTypeChar32, eBasicTypeShort, diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp index 7107768ba884..c6bc3288c4b2 100644 --- a/lldb/source/API/SBFrame.cpp +++ b/lldb/source/API/SBFrame.cpp @@ -119,15 +119,13 @@ SBSymbolContext SBFrame::GetSymbolContext(uint32_t resolve_scope) const { std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); SymbolContextItem scope = static_cast(resolve_scope); - StackFrame *frame = nullptr; Target *target = exe_ctx.GetTargetPtr(); Process *process = exe_ctx.GetProcessPtr(); if (target && process) { Process::StopLocker stop_locker; if (stop_locker.TryLock(&process->GetRunLock())) { - frame = exe_ctx.GetFramePtr(); - if (frame) - sb_sym_ctx.SetSymbolContext(&frame->GetSymbolContext(scope)); + if (StackFrame *frame = exe_ctx.GetFramePtr()) + sb_sym_ctx = frame->GetSymbolContext(scope); } } diff --git a/lldb/source/API/SBSymbolContext.cpp b/lldb/source/API/SBSymbolContext.cpp index 488d49884903..89fe051658ff 100644 --- a/lldb/source/API/SBSymbolContext.cpp +++ b/lldb/source/API/SBSymbolContext.cpp @@ -22,12 +22,10 @@ SBSymbolContext::SBSymbolContext() : m_opaque_up() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContext); } -SBSymbolContext::SBSymbolContext(const SymbolContext *sc_ptr) : m_opaque_up() { +SBSymbolContext::SBSymbolContext(const SymbolContext &sc) + : m_opaque_up(std::make_unique(sc)) { LLDB_RECORD_CONSTRUCTOR(SBSymbolContext, - (const lldb_private::SymbolContext *), sc_ptr); - - if (sc_ptr) - m_opaque_up = std::make_unique(*sc_ptr); + (const lldb_private::SymbolContext &), sc); } SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) : m_opaque_up() { @@ -49,13 +47,6 @@ const SBSymbolContext &SBSymbolContext::operator=(const SBSymbolContext &rhs) { return LLDB_RECORD_RESULT(*this); } -void SBSymbolContext::SetSymbolContext(const SymbolContext *sc_ptr) { - if (sc_ptr) - m_opaque_up = std::make_unique(*sc_ptr); - else - m_opaque_up->Clear(true); -} - bool SBSymbolContext::IsValid() const { LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, IsValid); return this->operator bool(); @@ -237,7 +228,7 @@ template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBSymbolContext, ()); LLDB_REGISTER_CONSTRUCTOR(SBSymbolContext, - (const lldb_private::SymbolContext *)); + (const lldb_private::SymbolContext &)); LLDB_REGISTER_CONSTRUCTOR(SBSymbolContext, (const lldb::SBSymbolContext &)); LLDB_REGISTER_METHOD( const lldb::SBSymbolContext &, diff --git a/lldb/source/API/SBSymbolContextList.cpp b/lldb/source/API/SBSymbolContextList.cpp index 9db84dc1bf4b..70a8bbe6694c 100644 --- a/lldb/source/API/SBSymbolContextList.cpp +++ b/lldb/source/API/SBSymbolContextList.cpp @@ -56,9 +56,8 @@ SBSymbolContext SBSymbolContextList::GetContextAtIndex(uint32_t idx) { SBSymbolContext sb_sc; if (m_opaque_up) { SymbolContext sc; - if (m_opaque_up->GetContextAtIndex(idx, sc)) { - sb_sc.SetSymbolContext(&sc); - } + if (m_opaque_up->GetContextAtIndex(idx, sc)) + sb_sc = sc; } return LLDB_RECORD_RESULT(sb_sc); } diff --git a/lldb/source/API/SBTypeSummary.cpp b/lldb/source/API/SBTypeSummary.cpp index 3800ae940c70..2d7f8ef340c9 100644 --- a/lldb/source/API/SBTypeSummary.cpp +++ b/lldb/source/API/SBTypeSummary.cpp @@ -100,20 +100,11 @@ const lldb_private::TypeSummaryOptions &SBTypeSummaryOptions::ref() const { } SBTypeSummaryOptions::SBTypeSummaryOptions( - const lldb_private::TypeSummaryOptions *lldb_object_ptr) { + const lldb_private::TypeSummaryOptions &lldb_object) + : m_opaque_up(std::make_unique(lldb_object)) { LLDB_RECORD_CONSTRUCTOR(SBTypeSummaryOptions, - (const lldb_private::TypeSummaryOptions *), - lldb_object_ptr); - - SetOptions(lldb_object_ptr); -} - -void SBTypeSummaryOptions::SetOptions( - const lldb_private::TypeSummaryOptions *lldb_object_ptr) { - if (lldb_object_ptr) - m_opaque_up = std::make_unique(*lldb_object_ptr); - else - m_opaque_up = std::make_unique(); + (const lldb_private::TypeSummaryOptions &), + lldb_object); } SBTypeSummary::SBTypeSummary() : m_opaque_sp() { @@ -175,7 +166,7 @@ SBTypeSummary SBTypeSummary::CreateWithCallback(FormatCallback cb, const TypeSummaryOptions &opt) -> bool { SBStream stream; SBValue sb_value(valobj.GetSP()); - SBTypeSummaryOptions options(&opt); + SBTypeSummaryOptions options(opt); if (!cb(sb_value, options, stream)) return false; stm.Write(stream.GetData(), stream.GetSize()); @@ -492,7 +483,7 @@ void RegisterMethods(Registry &R) { LLDB_REGISTER_METHOD(void, SBTypeSummaryOptions, SetCapping, (lldb::TypeSummaryCapping)); LLDB_REGISTER_CONSTRUCTOR(SBTypeSummaryOptions, - (const lldb_private::TypeSummaryOptions *)); + (const lldb_private::TypeSummaryOptions &)); } template <> diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index c33aabddc7d1..d707190e0e6c 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -83,6 +83,7 @@ add_lldb_library(lldbCore lldbTarget lldbUtility lldbPluginCPlusPlusLanguage + lldbPluginDLanguage lldbPluginObjCLanguage ${LLDB_CURSES_LIBS} diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 9122117ef5ff..60207f75b7df 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1034,7 +1034,7 @@ class FieldDelegate { // navigates to the next or the previous field. This is particularly useful to // do in-field validation and error setting. Fields with internal navigation // should call this method on their fields. - virtual void FieldDelegateExitCallback() { return; } + virtual void FieldDelegateExitCallback() {} // Fields may have internal navigation, for instance, a List Field have // multiple internal elements, which needs to be navigated. To allow for this @@ -1055,10 +1055,10 @@ class FieldDelegate { virtual bool FieldDelegateOnLastOrOnlyElement() { return true; } // Select the first element in the field if multiple elements exists. - virtual void FieldDelegateSelectFirstElement() { return; } + virtual void FieldDelegateSelectFirstElement() {} // Select the last element in the field if multiple elements exists. - virtual void FieldDelegateSelectLastElement() { return; } + virtual void FieldDelegateSelectLastElement() {} // Returns true if the field has an error, false otherwise. virtual bool FieldDelegateHasError() { return false; } @@ -2000,7 +2000,6 @@ template class ListFieldDelegate : public FieldDelegate { void FieldDelegateSelectLastElement() override { m_selection_type = SelectionType::NewButton; - return; } int GetNumberOfFields() { return m_fields.size(); } @@ -2292,7 +2291,7 @@ class FormDelegate { virtual std::string GetName() = 0; - virtual void UpdateFieldsVisibility() { return; } + virtual void UpdateFieldsVisibility() {} FieldDelegate *GetField(uint32_t field_index) { if (field_index < m_fields.size()) @@ -3765,13 +3764,11 @@ class SearcherWindowDelegate : public WindowDelegate { void SelectNext() { if (m_selected_match != m_delegate_sp->GetNumberOfMatches() - 1) m_selected_match++; - return; } void SelectPrevious() { if (m_selected_match != 0) m_selected_match--; - return; } void ExecuteCallback(Window &window) { @@ -4608,9 +4605,7 @@ class TreeDelegate { virtual void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) = 0; virtual void TreeDelegateGenerateChildren(TreeItem &item) = 0; virtual void TreeDelegateUpdateSelection(TreeItem &root, int &selection_index, - TreeItem *&selected_item) { - return; - } + TreeItem *&selected_item) {} // This is invoked when a tree item is selected. If true is returned, the // views are updated. virtual bool TreeDelegateItemSelected(TreeItem &item) = 0; diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp index 0ef5f0adc832..0a60f8e97707 100644 --- a/lldb/source/DataFormatters/FormatManager.cpp +++ b/lldb/source/DataFormatters/FormatManager.cpp @@ -575,7 +575,14 @@ FormatManager::GetCandidateLanguages(lldb::LanguageType lang_type) { case lldb::eLanguageTypeC_plus_plus_03: case lldb::eLanguageTypeC_plus_plus_11: case lldb::eLanguageTypeC_plus_plus_14: - return {lldb::eLanguageTypeC_plus_plus, lldb::eLanguageTypeObjC}; + return { + lldb::eLanguageTypeC_plus_plus, + lldb::eLanguageTypeObjC, + }; + case lldb::eLanguageTypeD: + return { + lldb::eLanguageTypeD + }; default: return {lang_type}; } diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index f2d22f7ed9cc..3c4a87c27e20 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -483,8 +483,6 @@ void IRExecutionUnit::GetRunnableInfo(Status &error, lldb::addr_t &func_addr, func_addr = m_function_load_addr; func_end = m_function_end_load_addr; - - return; } IRExecutionUnit::~IRExecutionUnit() { diff --git a/lldb/source/Expression/IRMemoryMap.cpp b/lldb/source/Expression/IRMemoryMap.cpp index 4ae2724d4dd8..9eee5cf5b9a2 100644 --- a/lldb/source/Expression/IRMemoryMap.cpp +++ b/lldb/source/Expression/IRMemoryMap.cpp @@ -609,7 +609,6 @@ void IRMemoryMap::WriteScalarToMemory(lldb::addr_t process_address, error.SetErrorToGenericError(); error.SetErrorString("Couldn't write scalar: its size was zero"); } - return; } void IRMemoryMap::WritePointerToMemory(lldb::addr_t process_address, @@ -757,7 +756,6 @@ void IRMemoryMap::ReadScalarFromMemory(Scalar &scalar, error.SetErrorToGenericError(); error.SetErrorString("Couldn't read scalar: its size was zero"); } - return; } void IRMemoryMap::ReadPointerFromMemory(lldb::addr_t *address, @@ -773,8 +771,6 @@ void IRMemoryMap::ReadPointerFromMemory(lldb::addr_t *address, return; *address = pointer_scalar.ULongLong(); - - return; } void IRMemoryMap::GetMemoryData(DataExtractor &extractor, diff --git a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp index 63178e6c8a7a..2f08b9fa8857 100644 --- a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp +++ b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp @@ -86,7 +86,6 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, ExitWithError(error_fd, "DupDescriptor-dup2"); ::close(target_fd); - return; } [[noreturn]] static void ChildFunc(int error_fd, diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 301bf949feef..bd03f18b47c0 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2216,7 +2216,6 @@ void CommandInterpreter::BuildAliasCommandArgs(CommandObject *alias_cmd_obj, } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return; } int CommandInterpreter::GetOptionArgumentPosition(const char *in_string) { @@ -2563,8 +2562,6 @@ void CommandInterpreter::HandleCommands(const StringList &commands, result.SetStatus(eReturnStatusSuccessFinishResult); m_debugger.SetAsyncExecution(old_async_execution); - - return; } // Make flags that we can pass into the IOHandler so our delegates can do the diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp index c3cfd0afe551..866b89f532ac 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp @@ -242,7 +242,6 @@ void DynamicLoaderMacOSXDYLD::DoInitialImageFetch() { ReadDYLDInfoFromMemoryAndSetNotificationCallback(0x8fe00000); } } - return; } // Assume that dyld is in memory at ADDR and try to parse it's load commands diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp index 85e2fcfc838c..7844f27139cf 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp @@ -211,7 +211,7 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body, Stmt **last_stmt_ptr = Body->body_end() - 1; Stmt *last_stmt = *last_stmt_ptr; - while (dyn_cast(last_stmt)) { + while (isa(last_stmt)) { if (last_stmt_ptr != Body->body_begin()) { last_stmt_ptr--; last_stmt = *last_stmt_ptr; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 80469e292580..6ed3cc9384f0 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -293,7 +293,7 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { NamedDecl *to_named_decl = dyn_cast(to); // Check if we already completed this type. - if (m_decls_already_completed.count(to_named_decl) != 0) + if (m_decls_already_completed.contains(to_named_decl)) return; // Queue this type to be completed. m_decls_to_complete.insert(to_named_decl); @@ -806,7 +806,7 @@ void ClangASTImporter::ForgetSource(clang::ASTContext *dst_ast, md->removeOriginsWithContext(src_ast); } -ClangASTImporter::MapCompleter::~MapCompleter() { return; } +ClangASTImporter::MapCompleter::~MapCompleter() {} llvm::Expected ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 410d8a95cb12..510352e8c173 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -513,8 +513,6 @@ void ClangASTSource::FindExternalLexicalDecls( // is consulted again when a clang::DeclContext::lookup is called. const_cast(decl_context)->setMustBuildLookupTable(); } - - return; } void ClangASTSource::FindExternalVisibleDecls(NameSearchContext &context) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h index 3afd1fd5f2d1..f3fec3f944a1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h @@ -59,7 +59,7 @@ class ClangASTSource : public clang::ExternalASTSource, GetExternalCXXBaseSpecifiers(uint64_t Offset) override { return nullptr; } - void MaterializeVisibleDecls(const clang::DeclContext *DC) { return; } + void MaterializeVisibleDecls(const clang::DeclContext *DC) {} void InstallASTContext(TypeSystemClang &ast_context); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 846c1597292b..4af5d41a5921 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1954,8 +1954,6 @@ void ClangExpressionDeclMap::AddContextClassType(NameSearchContext &context, return; context.AddNamedDecl(typedef_decl); - - return; } void ClangExpressionDeclMap::AddOneType(NameSearchContext &context, diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp index a6e36d81b950..8b132b54b7e6 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp @@ -353,7 +353,7 @@ class ValidPointerChecker : public Instrumenter { } bool InspectInstruction(llvm::Instruction &i) override { - if (dyn_cast(&i) || dyn_cast(&i)) + if (isa(&i) || isa(&i)) RegisterInstruction(i); return true; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp index f80dc2b14467..e0e41925f7ef 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp @@ -1255,7 +1255,7 @@ bool IRForTarget::MaybeHandleVariable(Value *llvm_value_ptr) { m_decl_map->AddValueToStruct(named_decl, lldb_private::ConstString(name), llvm_value_ptr, *value_size, value_alignment); - } else if (dyn_cast(llvm_value_ptr)) { + } else if (isa(llvm_value_ptr)) { LLDB_LOG(log, "Function pointers aren't handled right now"); return false; diff --git a/lldb/source/Plugins/Language/CMakeLists.txt b/lldb/source/Plugins/Language/CMakeLists.txt index 7869074566d1..6a4284d190be 100644 --- a/lldb/source/Plugins/Language/CMakeLists.txt +++ b/lldb/source/Plugins/Language/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(ClangCommon) add_subdirectory(CPlusPlus) +add_subdirectory(D) add_subdirectory(ObjC) add_subdirectory(ObjCPlusPlus) diff --git a/lldb/source/Plugins/Language/D/CMakeLists.txt b/lldb/source/Plugins/Language/D/CMakeLists.txt new file mode 100644 index 000000000000..b9c33b461a64 --- /dev/null +++ b/lldb/source/Plugins/Language/D/CMakeLists.txt @@ -0,0 +1,13 @@ +add_lldb_library(lldbPluginDLanguage PLUGIN + DLanguage.cpp + DTypeUtils.cpp + DFormatterFunctions.cpp + + LINK_LIBS + lldbCore + lldbDataFormatters + lldbSymbol + lldbTarget + LINK_COMPONENTS + Support +) diff --git a/lldb/source/Plugins/Language/D/DFormatterFunctions.cpp b/lldb/source/Plugins/Language/D/DFormatterFunctions.cpp new file mode 100644 index 000000000000..c117bdd07fb1 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DFormatterFunctions.cpp @@ -0,0 +1,139 @@ +//===-- DFormatterFunctions.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DFormatterFunctions.h" +#include "DTypeUtils.h" + +#include "lldb/Utility/ConstString.h" +#include "lldb/DataFormatters/FormatClasses.h" +#include "lldb/DataFormatters/FormattersHelpers.h" +#include "lldb/DataFormatters/StringPrinter.h" + +using namespace lldb_private; +using namespace lldb_private::formatters; +using namespace lldb; + +/* DStringSliceSummaryProvider */ + +namespace { +class DSliceSyntheticFrontEnd : public SyntheticChildrenFrontEnd { +public: + DSliceSyntheticFrontEnd(ValueObject &valobj) + : SyntheticChildrenFrontEnd(valobj) { + Update(); + } + + ~DSliceSyntheticFrontEnd() override = default; + + size_t CalculateNumChildren() override { return m_len; } + + lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + if (idx < m_len) { + ValueObjectSP &cached = m_children[idx]; + if (!cached) { + StreamString idx_name; + idx_name.Printf("[%" PRIu64 "]", (uint64_t)idx); + lldb::addr_t object_at_idx = m_base_data_address; + object_at_idx += idx * m_type.GetByteSize(nullptr).getValueOr(1); + cached = CreateValueObjectFromAddress( + idx_name.GetString(), object_at_idx, + m_backend.GetExecutionContextRef(), m_type); + } + return cached; + } + return ValueObjectSP(); + } + + bool Update() override { + size_t old_count = m_len; + + if (!IsDSlice(m_backend, &m_type, &m_base_data_address, &m_len)) + return old_count == 0; + + m_children.clear(); // ptr + m_children.clear(); // length + + return old_count == m_len; + } + + bool MightHaveChildren() override { return true; } + + size_t GetIndexOfChildWithName(ConstString name) override { + return ExtractIndexFromString(name.AsCString()); + } + +private: + CompilerType m_type; + lldb::addr_t m_base_data_address; + uint64_t m_len; + std::map m_children; +}; + +} // end of anonymous namespace + +SyntheticChildrenFrontEnd * lldb_private::formatters::DSliceSyntheticFrontEndCreator( + CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { + if (!valobj_sp) + return nullptr; + + lldb::ProcessSP process_sp(valobj_sp->GetProcessSP()); + if (!process_sp) + return nullptr; + return new DSliceSyntheticFrontEnd(*valobj_sp); +} + +bool lldb_private::formatters::DStringSliceSummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) { + DataExtractor data; + Status error; + valobj.GetData(data, error); + + if (error.Fail()) + return false; + + CompilerType Type; + addr_t AddrData; + uint64_t Len; + // is a D slice for sure, just fetching values + IsDSlice(valobj, &Type, &AddrData, &Len); + + TargetSP target_sp = valobj.GetTargetSP(); + if (!target_sp) + return false; + + StringPrinter::ReadStringAndDumpToStreamOptions options(valobj); + options.SetLocation(AddrData); + options.SetTargetSP(target_sp); + options.SetStream(&stream); + options.SetQuote('"'); + options.SetNeedsZeroTermination(false); + options.SetBinaryZeroIsTerminator(true); + options.SetSourceSize(Len); + options.SetHasSourceSize(true); + + llvm::Optional SizeOpt = Type.GetByteSize(nullptr); + uint64_t ByteSize = SizeOpt.hasValue() ? *SizeOpt : 1; + + switch (ByteSize) { + case 1: + return StringPrinter::ReadStringAndDumpToStream< + StringPrinter::StringElementType::UTF8>(options); + case 2: + options.SetSuffixToken("w"); + return StringPrinter::ReadStringAndDumpToStream< + StringPrinter::StringElementType::UTF16>(options); + case 4: + options.SetSuffixToken("d"); + return StringPrinter::ReadStringAndDumpToStream< + StringPrinter::StringElementType::UTF32>(options); + default: + stream.Printf("size for string element is not valid"); + return true; + } + return true; +} diff --git a/lldb/source/Plugins/Language/D/DFormatterFunctions.h b/lldb/source/Plugins/Language/D/DFormatterFunctions.h new file mode 100644 index 000000000000..df843e7a1c67 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DFormatterFunctions.h @@ -0,0 +1,28 @@ +//===-- DFormatterFunctions.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_D_DFORMATTERFUNCTIONS_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_D_DFORMATTERFUNCTIONS_H + +#include "lldb/lldb-forward.h" +#include "lldb/DataFormatters/TypeSynthetic.h" + +namespace lldb_private { +namespace formatters { + +SyntheticChildrenFrontEnd * +DSliceSyntheticFrontEndCreator(CXXSyntheticChildren *, + lldb::ValueObjectSP valobj_sp); + +bool DStringSliceSummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &); + +} // namespace formatters +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_D_DFORMATTERFUNCTIONS_H diff --git a/lldb/source/Plugins/Language/D/DLanguage.cpp b/lldb/source/Plugins/Language/D/DLanguage.cpp new file mode 100644 index 000000000000..75dd8bc0a822 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DLanguage.cpp @@ -0,0 +1,145 @@ +//===-- DLanguage.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DLanguage.h" +#include "DTypeUtils.h" +#include "DFormatterFunctions.h" + +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Mangled.h" +#include "lldb/DataFormatters/DataVisualization.h" +#include "lldb/DataFormatters/FormattersHelpers.h" +#include "lldb/lldb-enumerations.h" + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(DLanguage) + +void DLanguage::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), "D Language", + CreateInstance); +} + +void DLanguage::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +bool DLanguage::IsNilReference(ValueObject &valobj) { + // not an object from D + if (valobj.GetObjectRuntimeLanguage() != eLanguageTypeD) + return false; + + // a normal pointer + if (valobj.IsPointerType()) + { + bool canReadValue; + bool isZero = valobj.GetValueAsUnsigned(0, &canReadValue) == 0; + return canReadValue && isZero; + } + + lldb::addr_t valobj_addr; + uint64_t length; + + // not a slice + if (!IsDSlice(valobj, nullptr, &valobj_addr, &length)) + return false; + + // in D an empty array == null + if (valobj_addr == 0 || length == 0) + return true; + + // non-empty array with a non-null pointer + return false; +} + +HardcodedFormatters::HardcodedSyntheticFinder +DLanguage::GetHardcodedSynthetics() { + static llvm::once_flag g_initialize; + static HardcodedFormatters::HardcodedSyntheticFinder g_formatters; + + llvm::call_once(g_initialize, []() -> void { + g_formatters.push_back([](lldb_private::ValueObject &valobj, + lldb::DynamicValueType, + FormatManager & + fmt_mgr) -> SyntheticChildren::SharedPointer { + static CXXSyntheticChildren::SharedPointer formatter_sp( + new CXXSyntheticChildren( + SyntheticChildren::Flags() + .SetCascades(true) + .SetSkipPointers(true) + .SetSkipReferences(true) + .SetNonCacheable(true), + "D array slice synthetic children", + lldb_private::formatters::DSliceSyntheticFrontEndCreator)); + + if (IsDSlice(valobj, nullptr, nullptr, nullptr) && !IsDStringSlice(valobj, nullptr, nullptr, nullptr)) + return formatter_sp; + + return nullptr; + }); + }); + + return g_formatters; +} + +HardcodedFormatters::HardcodedSummaryFinder +DLanguage::GetHardcodedSummaries() { + static llvm::once_flag g_initialize; + static HardcodedFormatters::HardcodedSummaryFinder g_formatters; + + llvm::call_once(g_initialize, []() -> void { + g_formatters.push_back( + [](lldb_private::ValueObject &valobj, lldb::DynamicValueType, + FormatManager &fmt_mgr) -> TypeSummaryImpl::SharedPointer { + static CXXFunctionSummaryFormat::SharedPointer formatter_sp( + new CXXFunctionSummaryFormat( + TypeSummaryImpl::Flags() + .SetCascades(true) + .SetSkipPointers(true) + .SetSkipReferences(true) + .SetNonCacheable(true) + .SetDontShowChildren(true) + .SetDontShowValue(true) + .SetShowMembersOneLiner(false) + .SetHideItemNames(false), + lldb_private::formatters::DStringSliceSummaryProvider, + "D string slice summary provider")); + + if (IsDStringSlice(valobj, nullptr, nullptr, nullptr)) + return formatter_sp; + + return nullptr; + }); + }); + + return g_formatters; +} + +bool DLanguage::IsSourceFile(llvm::StringRef file_path) const { + const auto suffixes = {".d", ".dd", ".di"}; + for (const auto *const suffix : suffixes) + if (file_path.endswith_insensitive(suffix)) + return true; + + // not a D source file + return false; +} + +Language *DLanguage::CreateInstance(lldb::LanguageType language) { + if (language == eLanguageTypeD) + return new DLanguage(); + + // not D + return nullptr; +} + +bool DLanguage::SymbolNameFitsToLanguage(Mangled mangled) const { + const char *mangled_name = mangled.GetMangledName().GetCString(); + return mangled_name && Mangled::GetManglingScheme(mangled_name) == Mangled::eManglingSchemeD; +} diff --git a/lldb/source/Plugins/Language/D/DLanguage.h b/lldb/source/Plugins/Language/D/DLanguage.h new file mode 100644 index 000000000000..2fd70cb09f88 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DLanguage.h @@ -0,0 +1,58 @@ +//===-- DLanguage.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_D_DLANGUAGE_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_D_DLANGUAGE_H + +#include "lldb/Target/Language.h" +#include "llvm/ADT/StringRef.h" + +namespace lldb_private { + +class DLanguage : public Language { + public: + DLanguage() = default; + + ~DLanguage() override = default; + + // Static Functions + static void Initialize(); + + static void Terminate(); + + static lldb_private::Language *CreateInstance(lldb::LanguageType language); + + lldb::LanguageType GetLanguageType() const override { + return lldb::eLanguageTypeD; + } + + bool IsNilReference(ValueObject &valobj) override; + + llvm::StringRef GetNilReferenceSummaryString() override { return "null"; } + + static llvm::StringRef GetPluginNameStatic() { return "dlang"; } + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + bool IsSourceFile(llvm::StringRef file_path) const override; + + /* lldb::TypeCategoryImplSP GetFormatters() override; */ + + HardcodedFormatters::HardcodedSyntheticFinder + GetHardcodedSynthetics() override; + + HardcodedFormatters::HardcodedSummaryFinder + GetHardcodedSummaries() override; + + bool SymbolNameFitsToLanguage(Mangled mangled) const override; + +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_D_DLANGUAGE_H diff --git a/lldb/source/Plugins/Language/D/DTypeUtils.cpp b/lldb/source/Plugins/Language/D/DTypeUtils.cpp new file mode 100644 index 000000000000..cdea6f88823b --- /dev/null +++ b/lldb/source/Plugins/Language/D/DTypeUtils.cpp @@ -0,0 +1,106 @@ +//===-- DTypeUtils.cpp ------------------------------------------*---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DTypeUtils.h" + +#include "lldb/Symbol/CompilerType.h" +#include "lldb/Utility/ConstString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace lldb; +using namespace llvm; + +namespace lldb_private { + +bool IsDSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len) +{ + if (Valobj.MightHaveChildren()) + { + size_t nchildren = Valobj.GetNumChildren(); + + // doesn't have exactly two childrens + if (nchildren != 2) + return false; + + ConstString ptr_name("ptr"); + ConstString length_name("length"); + + // fetch slice fields + ValueObjectSP ptr_sp = Valobj.GetChildMemberWithName(ptr_name, true); + ValueObjectSP length_sp = Valobj.GetChildMemberWithName(length_name, true); + + // doesn't have those two exact childrens + if (!ptr_sp || !length_sp) + return false; + + CompilerType PteType = ptr_sp->GetCompilerType().GetPointeeType(); + // isn't a pointer + if (!PteType.IsValid()) + return false; + + bool success; + uint64_t lenVal = length_sp->GetValueAsUnsigned(0, &success); + // can't fetch unsigned value + if (!success) + return false; + + if (Type != nullptr) + *Type = PteType; + if (AddrData != nullptr) + *AddrData = ptr_sp->GetPointerValue(); + if (Len != nullptr) + *Len = lenVal; + + return true; + } + return false; +} + +bool IsDStringSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len) +{ + CompilerType TypeRet; + bool IsSlice = IsDSlice(Valobj, &TypeRet, AddrData, Len); + if (!IsSlice) + return false; + + if (Type != nullptr) + *Type = TypeRet; + + return IsCharType(TypeRet); +} + +bool IsCharType(CompilerType Type) +{ + switch(Type.GetFormat()) + { + case eFormatUnicode8: + case eFormatUnicode16: + case eFormatUnicode32: + case eFormatChar: + case eFormatCharPrintable: + return true; + case eFormatInvalid: break; + default: + return false; + } + + // fallback to char type names if no proper enconding found + return StringSwitch(Type.GetDisplayTypeName().GetCString()) + .Cases( + "char", "char8_t", // UTF-8 + "wchar", "char16_t", // UTF-16 + "dchar", "char32_t", // UTF-32 + "wchar_t", // platform dependent + true) + .Default(false); +} + +} // namespace lldb_private diff --git a/lldb/source/Plugins/Language/D/DTypeUtils.h b/lldb/source/Plugins/Language/D/DTypeUtils.h new file mode 100644 index 000000000000..3dabac665345 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DTypeUtils.h @@ -0,0 +1,29 @@ +//===-- DTypeUtils.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_D_DTYPEUTILS_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_D_DTYPEUTILS_H + +#include "lldb/lldb-forward.h" +#include "lldb/Core/ValueObject.h" + +using namespace lldb; + +namespace lldb_private { + +bool IsDSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len); + +bool IsDStringSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len); + +bool IsCharType(CompilerType Type); + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_D_DTYPEUTILS_H diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp index 9bc40c16e5d0..af11109ae45d 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp @@ -80,8 +80,6 @@ class lldb_private::AppleObjCExternalASTSource LLDB_LOG(log, " AOEAS::CT Before:\n{1}", ClangUtil::DumpDecl(tag_decl)); LLDB_LOG(log, " AOEAS::CT After:{1}", ClangUtil::DumpDecl(tag_decl)); - - return; } void CompleteType(clang::ObjCInterfaceDecl *interface_decl) override { @@ -107,7 +105,6 @@ class lldb_private::AppleObjCExternalASTSource LLDB_LOGF(log, " [CT] After:"); LLDB_LOG(log, " [CT] {0}", ClangUtil::DumpDecl(interface_decl)); } - return; } bool layoutRecordType( diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e0087dbd4941..e72d55dd2aba 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4647,7 +4647,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { // Add symbols from the trie to the symbol table. for (auto &e : external_sym_trie_entries) { - if (symbols_added.find(e.entry.address) != symbols_added.end()) + if (symbols_added.contains(e.entry.address)) continue; // Find the section that this trie address is in, use that to annotate @@ -5620,10 +5620,15 @@ addr_t ObjectFileMachO::GetAddressMask() { return mask; } -bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, +bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, + bool &value_is_offset, + UUID &uuid, ObjectFile::BinaryType &type) { - address = LLDB_INVALID_ADDRESS; + value = LLDB_INVALID_ADDRESS; + value_is_offset = false; uuid.Clear(); + uint32_t log2_pagesize = 0; // not currently passed up to caller + uint32_t platform = 0; // not currently passed up to caller ModuleSP module_sp(GetModule()); if (module_sp) { std::lock_guard guard(module_sp->GetMutex()); @@ -5641,6 +5646,26 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, uint64_t fileoff = m_data.GetU64_unchecked(&offset); uint64_t size = m_data.GetU64_unchecked(&offset); + // struct main_bin_spec + // { + // uint32_t version; // currently 2 + // uint32_t type; // 0 == unspecified, 1 == kernel, + // // 2 == user process, + // // 3 == standalone binary + // uint64_t address; // UINT64_MAX if address not specified + // uint64_t slide; // slide, UINT64_MAX if unspecified + // // 0 if no slide needs to be applied to + // // file address + // uuid_t uuid; // all zero's if uuid not specified + // uint32_t log2_pagesize; // process page size in log base 2, + // // e.g. 4k pages are 12. + // // 0 for unspecified + // uint32_t platform; // The Mach-O platform for this corefile. + // // 0 for unspecified. + // // The values are defined in + // // , PLATFORM_*. + // } __attribute((packed)); + // "main bin spec" (main binary specification) data payload is // formatted: // uint32_t version [currently 1] @@ -5656,14 +5681,25 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, if (strcmp("main bin spec", data_owner) == 0 && size >= 32) { offset = fileoff; uint32_t version; - if (m_data.GetU32(&offset, &version, 1) != nullptr && version == 1) { + if (m_data.GetU32(&offset, &version, 1) != nullptr && version <= 2) { uint32_t binspec_type = 0; uuid_t raw_uuid; memset(raw_uuid, 0, sizeof(uuid_t)); - if (m_data.GetU32(&offset, &binspec_type, 1) && - m_data.GetU64(&offset, &address, 1) && - m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) { + if (!m_data.GetU32(&offset, &binspec_type, 1)) + return false; + if (!m_data.GetU64(&offset, &value, 1)) + return false; + uint64_t slide = LLDB_INVALID_ADDRESS; + if (version > 1 && !m_data.GetU64(&offset, &slide, 1)) + return false; + if (value == LLDB_INVALID_ADDRESS && + slide != LLDB_INVALID_ADDRESS) { + value = slide; + value_is_offset = true; + } + + if (m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) { uuid = UUID::fromOptionalData(raw_uuid, sizeof(uuid_t)); // convert the "main bin spec" type into our // ObjectFile::BinaryType enum @@ -5681,6 +5717,10 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, type = eBinaryTypeStandalone; break; } + if (!m_data.GetU32(&offset, &log2_pagesize, 1)) + return false; + if (version > 1 && !m_data.GetU32(&offset, &platform, 1)) + return false; return true; } } @@ -7006,7 +7046,17 @@ bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) { for (const MachOCorefileImageEntry &image : image_infos.all_image_infos) { ModuleSpec module_spec; module_spec.GetUUID() = image.uuid; - module_spec.GetFileSpec() = FileSpec(image.filename.c_str()); + if (image.filename.empty()) { + char namebuf[80]; + if (image.load_address != LLDB_INVALID_ADDRESS) + snprintf(namebuf, sizeof(namebuf), "mem-image-0x%" PRIx64, + image.load_address); + else + snprintf(namebuf, sizeof(namebuf), "mem-image+0x%" PRIx64, image.slide); + module_spec.GetFileSpec() = FileSpec(namebuf); + } else { + module_spec.GetFileSpec() = FileSpec(image.filename.c_str()); + } if (image.currently_executing) { Symbols::DownloadObjectAndSymbolFile(module_spec, true); if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) { diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h index 084b0ac110ba..c666ef32b85e 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h @@ -120,7 +120,7 @@ class ObjectFileMachO : public lldb_private::ObjectFile { lldb::addr_t GetAddressMask() override; - bool GetCorefileMainBinaryInfo(lldb::addr_t &address, + bool GetCorefileMainBinaryInfo(lldb::addr_t &value, bool &value_is_offset, lldb_private::UUID &uuid, ObjectFile::BinaryType &type) override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp index 69692ddc77c4..8f27f6a39f1b 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp @@ -265,12 +265,11 @@ CoreSimulatorSupport::Device PlatformAppleSimulator::GetSimulatorDevice() { } #endif -bool PlatformAppleSimulator::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { - if (idx >= m_supported_triples.size()) - return false; - arch = ArchSpec(m_supported_triples[idx]); - return true; +std::vector PlatformAppleSimulator::GetSupportedArchitectures() { + std::vector result(m_supported_triples.size()); + llvm::transform(m_supported_triples, result.begin(), + [](llvm::StringRef triple) { return ArchSpec(triple); }); + return result; } PlatformSP PlatformAppleSimulator::CreateInstance( @@ -380,10 +379,11 @@ Status PlatformAppleSimulator::ResolveExecutable( // so ask the platform for the architectures that we should be using (in // the correct order) and see if we can find a match that way StreamString arch_names; + llvm::ListSeparator LS; ArchSpec platform_arch; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex( - idx, resolved_module_spec.GetArchitecture()); - ++idx) { + for (const ArchSpec &arch : GetSupportedArchitectures()) { + resolved_module_spec.GetArchitecture() = arch; + // Only match x86 with x86 and x86_64 with x86_64... if (!module_spec.GetArchitecture().IsValid() || module_spec.GetArchitecture().GetCore() == @@ -398,9 +398,7 @@ Status PlatformAppleSimulator::ResolveExecutable( error.SetErrorToGenericError(); } - if (idx > 0) - arch_names.PutCString(", "); - arch_names.PutCString(platform_arch.GetArchitectureName()); + arch_names << LS << platform_arch.GetArchitectureName(); } } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h index 583399073c58..e87636d9999c 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h @@ -65,8 +65,7 @@ class PlatformAppleSimulator : public PlatformDarwin { lldb_private::Target &target, lldb_private::Status &error) override; - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; lldb_private::Status ResolveExecutable( const lldb_private::ModuleSpec &module_spec, lldb::ModuleSP &module_sp, diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index f5ce3017a8f1..de64426d7b64 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -514,45 +514,19 @@ bool PlatformDarwin::ModuleIsExcludedForUnconstrainedSearches( return obj_type == ObjectFile::eTypeDynamicLinker; } -bool PlatformDarwin::x86GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +void PlatformDarwin::x86GetSupportedArchitectures( + std::vector &archs) { ArchSpec host_arch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault); - if (host_arch.GetCore() == ArchSpec::eCore_x86_64_x86_64h) { - switch (idx) { - case 0: - arch = host_arch; - return true; - - case 1: - arch.SetTriple("x86_64-apple-macosx"); - return true; - - case 2: - arch = HostInfo::GetArchitecture(HostInfo::eArchKind32); - return true; + archs.push_back(host_arch); - default: - return false; - } + if (host_arch.GetCore() == ArchSpec::eCore_x86_64_x86_64h) { + archs.push_back(ArchSpec("x86_64-apple-macosx")); + archs.push_back(HostInfo::GetArchitecture(HostInfo::eArchKind32)); } else { - if (idx == 0) { - arch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault); - return arch.IsValid(); - } else if (idx == 1) { - ArchSpec platform_arch( - HostInfo::GetArchitecture(HostInfo::eArchKindDefault)); - ArchSpec platform_arch64( - HostInfo::GetArchitecture(HostInfo::eArchKind64)); - if (platform_arch.IsExactMatch(platform_arch64)) { - // This macosx platform supports both 32 and 64 bit. Since we already - // returned the 64 bit arch for idx == 0, return the 32 bit arch for - // idx == 1 - arch = HostInfo::GetArchitecture(HostInfo::eArchKind32); - return arch.IsValid(); - } - } + ArchSpec host_arch64 = HostInfo::GetArchitecture(HostInfo::eArchKind64); + if (host_arch.IsExactMatch(host_arch64)) + archs.push_back(HostInfo::GetArchitecture(HostInfo::eArchKind32)); } - return false; } static llvm::ArrayRef GetCompatibleArchs(ArchSpec::Core core) { @@ -669,21 +643,19 @@ const char *PlatformDarwin::GetCompatibleArch(ArchSpec::Core core, size_t idx) { /// The architecture selection rules for arm processors These cpu subtypes have /// distinct names (e.g. armv7f) but armv7 binaries run fine on an armv7f /// processor. -bool PlatformDarwin::ARMGetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +void PlatformDarwin::ARMGetSupportedArchitectures( + std::vector &archs) { const ArchSpec system_arch = GetSystemArchitecture(); const ArchSpec::Core system_core = system_arch.GetCore(); - if (const char *compatible_arch = GetCompatibleArch(system_core, idx)) { + const char *compatible_arch; + for (unsigned idx = 0; + (compatible_arch = GetCompatibleArch(system_core, idx)); ++idx) { llvm::Triple triple; triple.setArchName(compatible_arch); triple.setVendor(llvm::Triple::VendorType::Apple); - arch.SetTriple(triple); - return true; + archs.push_back(ArchSpec(triple)); } - - arch.Clear(); - return false; } static FileSpec GetXcodeSelectPath() { @@ -1367,11 +1339,3 @@ FileSpec PlatformDarwin::GetCurrentCommandLineToolsDirectory() { return FileSpec(FindComponentInPath(fspec.GetPath(), "CommandLineTools")); return {}; } - -std::vector PlatformDarwin::GetSupportedArchitectures() { - std::vector result; - ArchSpec arch; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(idx, arch); ++idx) - result.push_back(arch); - return result; -} diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index 1a97c22cafff..bbb2a336c56e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -60,11 +60,9 @@ class PlatformDarwin : public PlatformPOSIX { bool ModuleIsExcludedForUnconstrainedSearches( lldb_private::Target &target, const lldb::ModuleSP &module_sp) override; - bool ARMGetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch); + void ARMGetSupportedArchitectures(std::vector &archs); - bool x86GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch); + void x86GetSupportedArchitectures(std::vector &archs); uint32_t GetResumeCountForLaunchInfo( lldb_private::ProcessLaunchInfo &launch_info) override; @@ -102,8 +100,6 @@ class PlatformDarwin : public PlatformPOSIX { /// located in. static lldb_private::FileSpec GetCurrentCommandLineToolsDirectory(); - std::vector GetSupportedArchitectures() override; - protected: static const char *GetCompatibleArch(lldb_private::ArchSpec::Core core, size_t idx); @@ -174,10 +170,6 @@ class PlatformDarwin : public PlatformPOSIX { static std::string FindComponentInPath(llvm::StringRef path, llvm::StringRef component); - virtual bool - GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) = 0; - std::string m_developer_directory; llvm::StringMap m_sdk_path; std::mutex m_sdk_path_mutex; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp index 823c5a2fd198..abb3b30e175a 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp @@ -911,13 +911,14 @@ Status PlatformDarwinKernel::ExamineKextForMatchingUUID( return {}; } -bool PlatformDarwinKernel::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformDarwinKernel::GetSupportedArchitectures() { + std::vector result; #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) - return ARMGetSupportedArchitectureAtIndex(idx, arch); + ARMGetSupportedArchitectures(result); #else - return x86GetSupportedArchitectureAtIndex(idx, arch); + x86GetSupportedArchitectures(result); #endif + return result; } void PlatformDarwinKernel::CalculateTrapHandlerSymbolNames() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h index 4cb6c10fb833..738594bda140 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h @@ -56,8 +56,7 @@ class PlatformDarwinKernel : public PlatformDarwin { llvm::SmallVectorImpl *old_modules, bool *did_create_ptr) override; - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; bool SupportsModules() override { return false; } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp index ac9604f681a0..3f2fb53ad654 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp @@ -133,39 +133,23 @@ ConstString PlatformMacOSX::GetSDKDirectory(lldb_private::Target &target) { return {}; } -bool PlatformMacOSX::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformMacOSX::GetSupportedArchitectures() { + std::vector result; #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) // macOS for ARM64 support both native and translated x86_64 processes - if (!m_num_arm_arches || idx < m_num_arm_arches) { - bool res = ARMGetSupportedArchitectureAtIndex(idx, arch); - if (res) - return true; - if (!m_num_arm_arches) - m_num_arm_arches = idx; - } + ARMGetSupportedArchitectures(result); - // We can't use x86GetSupportedArchitectureAtIndex() because it uses + // We can't use x86GetSupportedArchitectures() because it uses // the system architecture for some of its return values and also // has a 32bits variant. - if (idx == m_num_arm_arches) { - arch.SetTriple("x86_64-apple-macosx"); - return true; - } else if (idx == m_num_arm_arches + 1) { - arch.SetTriple("x86_64-apple-ios-macabi"); - return true; - } else if (idx == m_num_arm_arches + 2) { - arch.SetTriple("arm64-apple-ios"); - return true; - } else if (idx == m_num_arm_arches + 3) { - arch.SetTriple("arm64e-apple-ios"); - return true; - } - - return false; + result.push_back(ArchSpec("x86_64-apple-macosx")); + result.push_back(ArchSpec("x86_64-apple-ios-macabi")); + result.push_back(ArchSpec("arm64-apple-ios")); + result.push_back(ArchSpec("arm64e-apple-ios")); #else - return x86GetSupportedArchitectureAtIndex(idx, arch); + x86GetSupportedArchitectures(result); #endif + return result; } lldb_private::Status PlatformMacOSX::GetSharedModule( diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h index 404f93348dfa..113214befa92 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h @@ -47,8 +47,7 @@ class PlatformMacOSX : public PlatformDarwin { return PlatformDarwin::GetFile(source, destination); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; lldb_private::ConstString GetSDKDirectory(lldb_private::Target &target) override; @@ -59,11 +58,6 @@ class PlatformMacOSX : public PlatformDarwin { return PlatformDarwin::AddClangModuleCompilationOptionsForSDKType( target, options, lldb_private::XcodeSDK::Type::MacOSX); } - -private: -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) - uint32_t m_num_arm_arches = 0; -#endif }; #endif // LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMMACOSX_H diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp index b7ba2ea29bf6..495e8615c612 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp @@ -137,34 +137,8 @@ llvm::StringRef PlatformRemoteAppleBridge::GetDescriptionStatic() { return "Remote BridgeOS platform plug-in."; } -bool PlatformRemoteAppleBridge::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { - ArchSpec system_arch(GetSystemArchitecture()); - - const ArchSpec::Core system_core = system_arch.GetCore(); - switch (system_core) { - default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-bridgeos"); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-bridgeos"); - return true; - default: - break; - } - break; - } - arch.Clear(); - return false; +std::vector PlatformRemoteAppleBridge::GetSupportedArchitectures() { + return {ArchSpec("arm64-apple-bridgeos")}; } llvm::StringRef PlatformRemoteAppleBridge::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h index 1f4a0b0b0d4d..0d11df01d633 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h @@ -40,8 +40,7 @@ class PlatformRemoteAppleBridge : public PlatformRemoteDarwinDevice { llvm::StringRef GetDescription() override { return GetDescriptionStatic(); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp index 42d0de0e36c7..d11e13cd8239 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp @@ -132,90 +132,24 @@ llvm::StringRef PlatformRemoteAppleTV::GetDescriptionStatic() { return "Remote Apple TV platform plug-in."; } -bool PlatformRemoteAppleTV::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformRemoteAppleTV::GetSupportedArchitectures() { ArchSpec system_arch(GetSystemArchitecture()); const ArchSpec::Core system_core = system_arch.GetCore(); switch (system_core) { default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-tvos"); - return true; - case 1: - arch.SetTriple("armv7s-apple-tvos"); - return true; - case 2: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 3: - arch.SetTriple("thumbv7s-apple-tvos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-tvos"); - return true; - case 1: - arch.SetTriple("armv7s-apple-tvos"); - return true; - case 2: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 3: - arch.SetTriple("thumbv7s-apple-tvos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; + return {ArchSpec("arm64-apple-tvos"), ArchSpec("armv7s-apple-tvos"), + ArchSpec("armv7-apple-tvos"), ArchSpec("thumbv7s-apple-tvos"), + ArchSpec("thumbv7-apple-tvos")}; case ArchSpec::eCore_arm_armv7s: - switch (idx) { - case 0: - arch.SetTriple("armv7s-apple-tvos"); - return true; - case 1: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 2: - arch.SetTriple("thumbv7s-apple-tvos"); - return true; - case 3: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; + return {ArchSpec("armv7s-apple-tvos"), ArchSpec("armv7-apple-tvos"), + ArchSpec("thumbv7s-apple-tvos"), ArchSpec("thumbv7-apple-tvos")}; case ArchSpec::eCore_arm_armv7: - switch (idx) { - case 0: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 1: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; + return {ArchSpec("armv7-apple-tvos"), ArchSpec("thumbv7-apple-tvos")}; } - arch.Clear(); - return false; } llvm::StringRef PlatformRemoteAppleTV::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h index ce120082a74f..f6dbc7cac793 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h @@ -40,8 +40,7 @@ class PlatformRemoteAppleTV : public PlatformRemoteDarwinDevice { llvm::StringRef GetDescription() override { return GetDescriptionStatic(); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp index 849ff3a50486..e1d78be6c145 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp @@ -143,154 +143,39 @@ llvm::StringRef PlatformRemoteAppleWatch::GetDescriptionStatic() { PlatformRemoteAppleWatch::PlatformRemoteAppleWatch() : PlatformRemoteDarwinDevice() {} -bool PlatformRemoteAppleWatch::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformRemoteAppleWatch::GetSupportedArchitectures() { ArchSpec system_arch(GetSystemArchitecture()); const ArchSpec::Core system_core = system_arch.GetCore(); switch (system_core) { default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 3: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 6: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 7: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 3: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 6: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 7: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return { + ArchSpec("arm64-apple-watchos"), ArchSpec("armv7k-apple-watchos"), + ArchSpec("armv7s-apple-watchos"), ArchSpec("armv7-apple-watchos"), + ArchSpec("thumbv7k-apple-watchos"), ArchSpec("thumbv7-apple-watchos"), + ArchSpec("thumbv7s-apple-watchos"), ArchSpec("arm64_32-apple-watchos")}; case ArchSpec::eCore_arm_armv7k: - switch (idx) { - case 0: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 3: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 6: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return { + ArchSpec("armv7k-apple-watchos"), ArchSpec("armv7s-apple-watchos"), + ArchSpec("armv7-apple-watchos"), ArchSpec("thumbv7k-apple-watchos"), + ArchSpec("thumbv7-apple-watchos"), ArchSpec("thumbv7s-apple-watchos"), + ArchSpec("arm64_32-apple-watchos")}; case ArchSpec::eCore_arm_armv7s: - switch (idx) { - case 0: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 3: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 6: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return { + ArchSpec("armv7s-apple-watchos"), ArchSpec("armv7k-apple-watchos"), + ArchSpec("armv7-apple-watchos"), ArchSpec("thumbv7k-apple-watchos"), + ArchSpec("thumbv7-apple-watchos"), ArchSpec("thumbv7s-apple-watchos"), + ArchSpec("arm64_32-apple-watchos")}; case ArchSpec::eCore_arm_armv7: - switch (idx) { - case 0: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 3: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 4: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return {ArchSpec("armv7-apple-watchos"), ArchSpec("armv7k-apple-watchos"), + ArchSpec("thumbv7k-apple-watchos"), + ArchSpec("thumbv7-apple-watchos"), + ArchSpec("arm64_32-apple-watchos")}; } - arch.Clear(); - return false; } llvm::StringRef PlatformRemoteAppleWatch::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h index abecbd733505..07d1cff081dd 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h @@ -43,8 +43,7 @@ class PlatformRemoteAppleWatch : public PlatformRemoteDarwinDevice { // lldb_private::Platform functions - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp index 97b2b4a3b740..5bc90e77ba53 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp @@ -90,9 +90,9 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable( // so ask the platform for the architectures that we should be using (in // the correct order) and see if we can find a match that way StreamString arch_names; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex( - idx, resolved_module_spec.GetArchitecture()); - ++idx) { + llvm::ListSeparator LS; + for (const ArchSpec &arch : GetSupportedArchitectures()) { + resolved_module_spec.GetArchitecture() = arch; error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, nullptr, nullptr, nullptr); // Did we find an executable using one of the @@ -103,10 +103,7 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable( error.SetErrorToGenericError(); } - if (idx > 0) - arch_names.PutCString(", "); - arch_names.PutCString( - resolved_module_spec.GetArchitecture().GetArchitectureName()); + arch_names << LS << arch.GetArchitectureName(); } if (error.Fail() || !exe_module_sp) { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp index 72c3480da661..1e969f05e15b 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp @@ -124,35 +124,19 @@ PlatformSP PlatformRemoteMacOSX::CreateInstance(bool force, return PlatformSP(); } -bool PlatformRemoteMacOSX::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformRemoteMacOSX::GetSupportedArchitectures() { // macOS for ARM64 support both native and translated x86_64 processes - if (!m_num_arm_arches || idx < m_num_arm_arches) { - bool res = ARMGetSupportedArchitectureAtIndex(idx, arch); - if (res) - return true; - if (!m_num_arm_arches) - m_num_arm_arches = idx; - } + std::vector result; + ARMGetSupportedArchitectures(result); - // We can't use x86GetSupportedArchitectureAtIndex() because it uses + // We can't use x86GetSupportedArchitectures() because it uses // the system architecture for some of its return values and also // has a 32bits variant. - if (idx == m_num_arm_arches) { - arch.SetTriple("x86_64-apple-macosx"); - return true; - } else if (idx == m_num_arm_arches + 1) { - arch.SetTriple("x86_64-apple-ios-macabi"); - return true; - } else if (idx == m_num_arm_arches + 2) { - arch.SetTriple("arm64-apple-ios"); - return true; - } else if (idx == m_num_arm_arches + 3) { - arch.SetTriple("arm64e-apple-ios"); - return true; - } - - return false; + result.push_back(ArchSpec("x86_64-apple-macosx")); + result.push_back(ArchSpec("x86_64-apple-ios-macabi")); + result.push_back(ArchSpec("arm64-apple-ios")); + result.push_back(ArchSpec("arm64e-apple-ios")); + return result; } lldb_private::Status PlatformRemoteMacOSX::GetFileWithUUID( diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h index 83335cda1f80..8d08a51b31b8 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h @@ -42,15 +42,11 @@ class PlatformRemoteMacOSX : public virtual PlatformRemoteDarwinDevice { const lldb_private::UUID *uuid_ptr, lldb_private::FileSpec &local_file) override; - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; llvm::StringRef GetPlatformName() override; - -private: - uint32_t m_num_arm_arches = 0; }; #endif // LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEMACOSX_H diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp index 8824cab2fff2..616123698e66 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp @@ -133,9 +133,10 @@ llvm::StringRef PlatformRemoteiOS::GetDescriptionStatic() { PlatformRemoteiOS::PlatformRemoteiOS() : PlatformRemoteDarwinDevice() {} -bool PlatformRemoteiOS::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { - return ARMGetSupportedArchitectureAtIndex(idx, arch); +std::vector PlatformRemoteiOS::GetSupportedArchitectures() { + std::vector result; + ARMGetSupportedArchitectures(result); + return result; } llvm::StringRef PlatformRemoteiOS::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h index 302df33fc116..94be124d4310 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h @@ -39,8 +39,7 @@ class PlatformRemoteiOS : public PlatformRemoteDarwinDevice { // lldb_private::PluginInterface functions llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt b/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt index ad388ee76b07..ca89ab0b6dc9 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt +++ b/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt @@ -1,4 +1,12 @@ -if (NOT FBSDVMCore_FOUND) +set(FBSDKERNEL_LIBS) +if(FBSDVMCore_FOUND) + list(APPEND FBSDKERNEL_LIBS fbsdvmcore) +endif() +if(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + list(APPEND FBSDKERNEL_LIBS kvm) +endif() + +if (NOT FBSDKERNEL_LIBS) message(STATUS "Skipping FreeBSDKernel plugin due to missing libfbsdvmcore") return() endif() @@ -13,7 +21,7 @@ add_lldb_library(lldbPluginProcessFreeBSDKernel PLUGIN LINK_LIBS lldbCore lldbTarget - fbsdvmcore + ${FBSDKERNEL_LIBS} LINK_COMPONENTS Support ) diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp index 327e25acd589..339d33d25110 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp +++ b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp @@ -10,42 +10,91 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Target/DynamicLoader.h" +#include "Plugins/DynamicLoader/Static/DynamicLoaderStatic.h" #include "ProcessFreeBSDKernel.h" #include "ThreadFreeBSDKernel.h" -#include "Plugins/DynamicLoader/Static/DynamicLoaderStatic.h" +#if LLDB_ENABLE_FBSDVMCORE #include +#endif +#if defined(__FreeBSD__) +#include +#endif using namespace lldb; using namespace lldb_private; LLDB_PLUGIN_DEFINE(ProcessFreeBSDKernel) -ProcessFreeBSDKernel::ProcessFreeBSDKernel(lldb::TargetSP target_sp, - ListenerSP listener_sp, - const FileSpec &core_file, void *fvc) - : PostMortemProcess(target_sp, listener_sp), m_fvc(fvc) {} +namespace { -ProcessFreeBSDKernel::~ProcessFreeBSDKernel() { - if (m_fvc) - fvc_close(static_cast(m_fvc)); -} +#if LLDB_ENABLE_FBSDVMCORE +class ProcessFreeBSDKernelFVC : public ProcessFreeBSDKernel { +public: + ProcessFreeBSDKernelFVC(lldb::TargetSP target_sp, lldb::ListenerSP listener, + fvc_t *fvc); + + ~ProcessFreeBSDKernelFVC(); + + size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, + lldb_private::Status &error) override; + +private: + fvc_t *m_fvc; + + const char *GetError(); +}; +#endif // LLDB_ENABLE_FBSDVMCORE + +#if defined(__FreeBSD__) +class ProcessFreeBSDKernelKVM : public ProcessFreeBSDKernel { +public: + ProcessFreeBSDKernelKVM(lldb::TargetSP target_sp, lldb::ListenerSP listener, + kvm_t *fvc); + + ~ProcessFreeBSDKernelKVM(); + + size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, + lldb_private::Status &error) override; + +private: + kvm_t *m_kvm; + + const char *GetError(); +}; +#endif // defined(__FreeBSD__) + +} // namespace + +ProcessFreeBSDKernel::ProcessFreeBSDKernel(lldb::TargetSP target_sp, + ListenerSP listener_sp) + : PostMortemProcess(target_sp, listener_sp) {} lldb::ProcessSP ProcessFreeBSDKernel::CreateInstance(lldb::TargetSP target_sp, ListenerSP listener_sp, const FileSpec *crash_file, bool can_connect) { - lldb::ProcessSP process_sp; ModuleSP executable = target_sp->GetExecutableModule(); if (crash_file && !can_connect && executable) { - fvc_t *fvc = fvc_open( - executable->GetFileSpec().GetPath().c_str(), - crash_file->GetPath().c_str(), nullptr, nullptr, nullptr); +#if LLDB_ENABLE_FBSDVMCORE + fvc_t *fvc = + fvc_open(executable->GetFileSpec().GetPath().c_str(), + crash_file->GetPath().c_str(), nullptr, nullptr, nullptr); if (fvc) - process_sp = std::make_shared( - target_sp, listener_sp, *crash_file, fvc); + return std::make_shared(target_sp, listener_sp, + fvc); +#endif + +#if defined(__FreeBSD__) + kvm_t *kvm = + kvm_open2(executable->GetFileSpec().GetPath().c_str(), + crash_file->GetPath().c_str(), O_RDONLY, nullptr, nullptr); + if (kvm) + return std::make_shared(target_sp, listener_sp, + kvm); +#endif } - return process_sp; + return nullptr; } void ProcessFreeBSDKernel::Initialize() { @@ -107,20 +156,63 @@ Status ProcessFreeBSDKernel::DoLoadCore() { return Status(); } -size_t ProcessFreeBSDKernel::DoReadMemory(lldb::addr_t addr, void *buf, - size_t size, Status &error) { - ssize_t rd = fvc_read(static_cast(m_fvc), addr, buf, size); +DynamicLoader *ProcessFreeBSDKernel::GetDynamicLoader() { + if (m_dyld_up.get() == nullptr) + m_dyld_up.reset(DynamicLoader::FindPlugin( + this, DynamicLoaderStatic::GetPluginNameStatic())); + return m_dyld_up.get(); +} + +#if LLDB_ENABLE_FBSDVMCORE + +ProcessFreeBSDKernelFVC::ProcessFreeBSDKernelFVC(lldb::TargetSP target_sp, + ListenerSP listener_sp, + fvc_t *fvc) + : ProcessFreeBSDKernel(target_sp, listener_sp), m_fvc(fvc) {} + +ProcessFreeBSDKernelFVC::~ProcessFreeBSDKernelFVC() { + if (m_fvc) + fvc_close(m_fvc); +} + +size_t ProcessFreeBSDKernelFVC::DoReadMemory(lldb::addr_t addr, void *buf, + size_t size, Status &error) { + ssize_t rd = 0; + rd = fvc_read(m_fvc, addr, buf, size); if (rd < 0 || static_cast(rd) != size) { - error.SetErrorStringWithFormat("Reading memory failed: %s", - fvc_geterr(static_cast(m_fvc))); + error.SetErrorStringWithFormat("Reading memory failed: %s", GetError()); return rd > 0 ? rd : 0; } return rd; } -DynamicLoader *ProcessFreeBSDKernel::GetDynamicLoader() { - if (m_dyld_up.get() == nullptr) - m_dyld_up.reset(DynamicLoader::FindPlugin( - this, DynamicLoaderStatic::GetPluginNameStatic())); - return m_dyld_up.get(); +const char *ProcessFreeBSDKernelFVC::GetError() { return fvc_geterr(m_fvc); } + +#endif // LLDB_ENABLE_FBSDVMCORE + +#if defined(__FreeBSD__) + +ProcessFreeBSDKernelKVM::ProcessFreeBSDKernelKVM(lldb::TargetSP target_sp, + ListenerSP listener_sp, + kvm_t *fvc) + : ProcessFreeBSDKernel(target_sp, listener_sp), m_kvm(fvc) {} + +ProcessFreeBSDKernelKVM::~ProcessFreeBSDKernelKVM() { + if (m_kvm) + kvm_close(m_kvm); } + +size_t ProcessFreeBSDKernelKVM::DoReadMemory(lldb::addr_t addr, void *buf, + size_t size, Status &error) { + ssize_t rd = 0; + rd = kvm_read2(m_kvm, addr, buf, size); + if (rd < 0 || static_cast(rd) != size) { + error.SetErrorStringWithFormat("Reading memory failed: %s", GetError()); + return rd > 0 ? rd : 0; + } + return rd; +} + +const char *ProcessFreeBSDKernelKVM::GetError() { return kvm_geterr(m_kvm); } + +#endif // defined(__FreeBSD__) diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h index cd10728463b5..558eec5403db 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h +++ b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h @@ -13,10 +13,7 @@ class ProcessFreeBSDKernel : public lldb_private::PostMortemProcess { public: - ProcessFreeBSDKernel(lldb::TargetSP target_sp, lldb::ListenerSP listener, - const lldb_private::FileSpec &core_file, void *fvc); - - ~ProcessFreeBSDKernel() override; + ProcessFreeBSDKernel(lldb::TargetSP target_sp, lldb::ListenerSP listener); static lldb::ProcessSP CreateInstance(lldb::TargetSP target_sp, lldb::ListenerSP listener, @@ -44,17 +41,11 @@ class ProcessFreeBSDKernel : public lldb_private::PostMortemProcess { lldb_private::Status DoLoadCore() override; - size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, - lldb_private::Status &error) override; - lldb_private::DynamicLoader *GetDynamicLoader() override; protected: bool DoUpdateThreadList(lldb_private::ThreadList &old_thread_list, lldb_private::ThreadList &new_thread_list) override; - -private: - void *m_fvc; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_FREEBSDKERNEL_PROCESSFREEBSDKERNEL_H diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index 279ffb1f2a9c..d7651ce71da0 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -874,7 +874,7 @@ void NativeProcessLinux::MonitorSignal(const siginfo_t &info, // Check if debugger should stop at this signal or just ignore it and resume // the inferior. - if (m_signals_to_ignore.find(signo) != m_signals_to_ignore.end()) { + if (m_signals_to_ignore.contains(signo)) { ResumeThread(thread, thread.GetState(), signo); return; } diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 07dfa5e04ee5..b5b105351de5 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -1006,6 +1006,23 @@ GDBRemoteCommunicationClient::GetProcessArchitecture() { return m_process_arch; } +bool GDBRemoteCommunicationClient::GetProcessStandaloneBinary( + UUID &uuid, addr_t &value, bool &value_is_offset) { + if (m_qProcessInfo_is_valid == eLazyBoolCalculate) + GetCurrentProcessInfo(); + + // Return true if we have a UUID or an address/offset of the + // main standalone / firmware binary being used. + if (!m_process_standalone_uuid.IsValid() && + m_process_standalone_value == LLDB_INVALID_ADDRESS) + return false; + + uuid = m_process_standalone_uuid; + value = m_process_standalone_value; + value_is_offset = m_process_standalone_value_is_offset; + return true; +} + bool GDBRemoteCommunicationClient::GetGDBServerVersion() { if (m_qGDBServerVersion_is_valid == eLazyBoolCalculate) { m_gdb_server_name.clear(); @@ -2147,6 +2164,25 @@ bool GDBRemoteCommunicationClient::GetCurrentProcessInfo(bool allow_lazy) { } else if (name.equals("elf_abi")) { elf_abi = std::string(value); ++num_keys_decoded; + } else if (name.equals("main-binary-uuid")) { + m_process_standalone_uuid.SetFromStringRef(value); + ++num_keys_decoded; + } else if (name.equals("main-binary-slide")) { + StringExtractor extractor(value); + m_process_standalone_value = + extractor.GetU64(LLDB_INVALID_ADDRESS, 16); + if (m_process_standalone_value != LLDB_INVALID_ADDRESS) { + m_process_standalone_value_is_offset = true; + ++num_keys_decoded; + } + } else if (name.equals("main-binary-address")) { + StringExtractor extractor(value); + m_process_standalone_value = + extractor.GetU64(LLDB_INVALID_ADDRESS, 16); + if (m_process_standalone_value != LLDB_INVALID_ADDRESS) { + m_process_standalone_value_is_offset = false; + ++num_keys_decoded; + } } } if (num_keys_decoded > 0) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index 6765372ce124..c69c33bb1c15 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -217,6 +217,9 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { const ArchSpec &GetProcessArchitecture(); + bool GetProcessStandaloneBinary(UUID &uuid, lldb::addr_t &value, + bool &value_is_offset); + void GetRemoteQSupported(); bool GetVContSupported(char flavor); @@ -584,6 +587,9 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { ArchSpec m_host_arch; ArchSpec m_process_arch; + UUID m_process_standalone_uuid; + lldb::addr_t m_process_standalone_value = LLDB_INVALID_ADDRESS; + bool m_process_standalone_value_is_offset = false; llvm::VersionTuple m_os_version; llvm::VersionTuple m_maccatalyst_version; std::string m_os_build; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 89702d176ef8..93fe36c0d9d6 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -561,6 +561,94 @@ Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) { } } + // The remote stub may know about the "main binary" in + // the context of a firmware debug session, and can + // give us a UUID and an address/slide of where the + // binary is loaded in memory. + UUID standalone_uuid; + addr_t standalone_value; + bool standalone_value_is_offset; + if (m_gdb_comm.GetProcessStandaloneBinary( + standalone_uuid, standalone_value, standalone_value_is_offset)) { + ModuleSP module_sp; + + if (standalone_uuid.IsValid()) { + ModuleSpec module_spec; + module_spec.GetUUID() = standalone_uuid; + + // Look up UUID in global module cache before attempting + // a more expensive search. + Status error = ModuleList::GetSharedModule(module_spec, module_sp, + nullptr, nullptr, nullptr); + + if (!module_sp) { + // Force a an external lookup, if that tool is available. + if (!module_spec.GetSymbolFileSpec()) + Symbols::DownloadObjectAndSymbolFile(module_spec, true); + + if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) { + module_sp = std::make_shared(module_spec); + } + } + + // If we couldn't find the binary anywhere else, as a last resort, + // read it out of memory. + if (!module_sp.get() && standalone_value != LLDB_INVALID_ADDRESS && + !standalone_value_is_offset) { + char namebuf[80]; + snprintf(namebuf, sizeof(namebuf), "mem-image-0x%" PRIx64, + standalone_value); + module_sp = + ReadModuleFromMemory(FileSpec(namebuf), standalone_value); + } + + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet( + LIBLLDB_LOG_DYNAMIC_LOADER)); + if (module_sp.get()) { + target.GetImages().AppendIfNeeded(module_sp, false); + + bool changed = false; + if (module_sp->GetObjectFile()) { + if (standalone_value != LLDB_INVALID_ADDRESS) { + if (log) + log->Printf("Loading binary UUID %s at %s 0x%" PRIx64, + standalone_uuid.GetAsString().c_str(), + standalone_value_is_offset ? "offset" : "address", + standalone_value); + module_sp->SetLoadAddress(target, standalone_value, + standalone_value_is_offset, changed); + } else { + // No address/offset/slide, load the binary at file address, + // offset 0. + if (log) + log->Printf("Loading binary UUID %s at file address", + standalone_uuid.GetAsString().c_str()); + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } + } else { + // In-memory image, load at its true address, offset 0. + if (log) + log->Printf("Loading binary UUID %s from memory", + standalone_uuid.GetAsString().c_str()); + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } + + ModuleList added_module; + added_module.Append(module_sp, false); + target.ModulesDidLoad(added_module); + } else { + if (log) + log->Printf("Unable to find binary with UUID %s and load it at " + "%s 0x%" PRIx64, + standalone_uuid.GetAsString().c_str(), + standalone_value_is_offset ? "offset" : "address", + standalone_value); + } + } + } + const StateType state = SetThreadStopInfo(response); if (state != eStateInvalid) { SetPrivateState(state); diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp index 6aed04565eb0..4b3e244dcb23 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp @@ -184,7 +184,8 @@ bool ProcessMachCore::GetDynamicLoaderAddress(lldb::addr_t addr) { // Try to load a file with that UUID into lldb, and if we have a load // address, set it correctly. Else assume that the binary was loaded // with no slide. -static bool load_standalone_binary(UUID uuid, addr_t addr, Target &target) { +static bool load_standalone_binary(UUID uuid, addr_t value, + bool value_is_offset, Target &target) { if (uuid.IsValid()) { ModuleSpec module_spec; module_spec.GetUUID() = uuid; @@ -205,18 +206,34 @@ static bool load_standalone_binary(UUID uuid, addr_t addr, Target &target) { } } - if (module_sp.get() && module_sp->GetObjectFile()) { + // If we couldn't find the binary anywhere else, as a last resort, + // read it out of memory in the corefile. + if (!module_sp.get() && value != LLDB_INVALID_ADDRESS && !value_is_offset) { + char namebuf[80]; + snprintf(namebuf, sizeof(namebuf), "mem-image-0x%" PRIx64, value); + module_sp = + target.GetProcessSP()->ReadModuleFromMemory(FileSpec(namebuf), value); + } + + if (module_sp.get()) { target.SetArchitecture(module_sp->GetObjectFile()->GetArchitecture()); target.GetImages().AppendIfNeeded(module_sp, false); - Address base_addr = module_sp->GetObjectFile()->GetBaseAddress(); - addr_t slide = 0; - if (addr != LLDB_INVALID_ADDRESS && base_addr.IsValid()) { - addr_t file_load_addr = base_addr.GetFileAddress(); - slide = addr - file_load_addr; - } bool changed = false; - module_sp->SetLoadAddress(target, slide, true, changed); + if (module_sp->GetObjectFile()) { + if (value != LLDB_INVALID_ADDRESS) { + module_sp->SetLoadAddress(target, value, value_is_offset, changed); + } else { + // No address/offset/slide, load the binary at file address, + // offset 0. + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } + } else { + // In-memory image, load at its true address, offset 0. + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } ModuleList added_module; added_module.Append(module_sp, false); @@ -316,33 +333,38 @@ Status ProcessMachCore::DoLoadCore() { bool found_main_binary_definitively = false; - addr_t objfile_binary_addr; + addr_t objfile_binary_value; + bool objfile_binary_value_is_offset; UUID objfile_binary_uuid; ObjectFile::BinaryType type; - if (core_objfile->GetCorefileMainBinaryInfo(objfile_binary_addr, + if (core_objfile->GetCorefileMainBinaryInfo(objfile_binary_value, + objfile_binary_value_is_offset, objfile_binary_uuid, type)) { if (log) { log->Printf( "ProcessMachCore::DoLoadCore: using binary hint from 'main bin spec' " - "LC_NOTE with UUID %s address 0x%" PRIx64 " and type %d", - objfile_binary_uuid.GetAsString().c_str(), objfile_binary_addr, type); + "LC_NOTE with UUID %s value 0x%" PRIx64 + " value is offset %d and type %d", + objfile_binary_uuid.GetAsString().c_str(), objfile_binary_value, + objfile_binary_value_is_offset, type); } - if (objfile_binary_addr != LLDB_INVALID_ADDRESS) { + if (objfile_binary_value != LLDB_INVALID_ADDRESS && + !objfile_binary_value_is_offset) { if (type == ObjectFile::eBinaryTypeUser) { - m_dyld_addr = objfile_binary_addr; + m_dyld_addr = objfile_binary_value; m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); found_main_binary_definitively = true; } if (type == ObjectFile::eBinaryTypeKernel) { - m_mach_kernel_addr = objfile_binary_addr; + m_mach_kernel_addr = objfile_binary_value; m_dyld_plugin_name = DynamicLoaderDarwinKernel::GetPluginNameStatic(); found_main_binary_definitively = true; } } if (!found_main_binary_definitively) { // ObjectFile::eBinaryTypeStandalone, undeclared types - if (load_standalone_binary(objfile_binary_uuid, objfile_binary_addr, - GetTarget())) { + if (load_standalone_binary(objfile_binary_uuid, objfile_binary_value, + objfile_binary_value_is_offset, GetTarget())) { found_main_binary_definitively = true; m_dyld_plugin_name = DynamicLoaderStatic::GetPluginNameStatic(); } @@ -388,17 +410,21 @@ Status ProcessMachCore::DoLoadCore() { m_mach_kernel_addr = ident_binary_addr; found_main_binary_definitively = true; } else if (ident_uuid.IsValid()) { - if (load_standalone_binary(ident_uuid, ident_binary_addr, GetTarget())) { + // We have no address specified, only a UUID. Load it at the file + // address. + const bool value_is_offset = false; + if (load_standalone_binary(ident_uuid, ident_binary_addr, value_is_offset, + GetTarget())) { found_main_binary_definitively = true; m_dyld_plugin_name = DynamicLoaderStatic::GetPluginNameStatic(); } } } + bool did_load_extra_binaries = core_objfile->LoadCoreFileImages(*this); // If we have a "all image infos" LC_NOTE, try to load all of the // binaries listed, and set their Section load addresses in the Target. - if (found_main_binary_definitively == false && - core_objfile->LoadCoreFileImages(*this)) { + if (found_main_binary_definitively == false && did_load_extra_binaries) { m_dyld_plugin_name = DynamicLoaderDarwinKernel::GetPluginNameStatic(); found_main_binary_definitively = true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt index 550404314af5..04504c7f55a7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt @@ -11,7 +11,9 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN DebugNamesDWARFIndex.cpp DIERef.cpp DWARFAbbreviationDeclaration.cpp + DWARFASTParser.cpp DWARFASTParserClang.cpp + DWARFASTParserD.cpp DWARFAttribute.cpp DWARFBaseDIE.cpp DWARFCompileUnit.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp new file mode 100644 index 000000000000..bd933cbe7099 --- /dev/null +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -0,0 +1,313 @@ +//===-- DWARFASTParser.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DWARFASTParser.h" +#include "DWARFAttribute.h" +#include "DWARFDIE.h" +#include "DWARFUnit.h" + +#include "lldb/Core/Module.h" +#include "lldb/Core/ValueObject.h" +#include "lldb/Symbol/CompileUnit.h" +#include "lldb/Symbol/SymbolFile.h" +#include "lldb/Target/StackFrame.h" + +using namespace lldb; +using namespace lldb_private; + +llvm::Optional +DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, + const ExecutionContext *exe_ctx) { + SymbolFile::ArrayInfo array_info; + if (!parent_die) + return llvm::None; + + for (DWARFDIE die : parent_die.children()) { + const dw_tag_t tag = die.Tag(); + if (tag != DW_TAG_subrange_type) + continue; + + DWARFAttributes attributes; + const size_t num_child_attributes = die.GetAttributes(attributes); + if (num_child_attributes > 0) { + uint64_t num_elements = 0; + uint64_t lower_bound = 0; + uint64_t upper_bound = 0; + bool upper_bound_valid = false; + uint32_t i; + for (i = 0; i < num_child_attributes; ++i) { + const dw_attr_t attr = attributes.AttributeAtIndex(i); + DWARFFormValue form_value; + if (attributes.ExtractFormValueAtIndex(i, form_value)) { + switch (attr) { + case DW_AT_name: + break; + + case DW_AT_count: + if (DWARFDIE var_die = die.GetReferencedDIE(DW_AT_count)) { + if (var_die.Tag() == DW_TAG_variable) + if (exe_ctx) { + if (auto frame = exe_ctx->GetFrameSP()) { + Status error; + lldb::VariableSP var_sp; + auto valobj_sp = frame->GetValueForVariableExpressionPath( + var_die.GetName(), eNoDynamicValues, 0, var_sp, error); + if (valobj_sp) { + num_elements = valobj_sp->GetValueAsUnsigned(0); + break; + } + } + } + } else + num_elements = form_value.Unsigned(); + break; + + case DW_AT_bit_stride: + array_info.bit_stride = form_value.Unsigned(); + break; + + case DW_AT_byte_stride: + array_info.byte_stride = form_value.Unsigned(); + break; + + case DW_AT_lower_bound: + lower_bound = form_value.Unsigned(); + break; + + case DW_AT_upper_bound: + upper_bound_valid = true; + upper_bound = form_value.Unsigned(); + break; + + default: + break; + } + } + } + + if (num_elements == 0) { + if (upper_bound_valid && upper_bound >= lower_bound) + num_elements = upper_bound - lower_bound + 1; + } + + array_info.element_orders.push_back(num_elements); + } + } + return array_info; +} + +AccessType +DWARFASTParser::GetAccessTypeFromDWARF(uint32_t dwarf_accessibility) { + switch (dwarf_accessibility) { + case DW_ACCESS_public: + return eAccessPublic; + case DW_ACCESS_private: + return eAccessPrivate; + case DW_ACCESS_protected: + return eAccessProtected; + default: + break; + } + return eAccessNone; +} + +ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { + DWARFAttributes attributes; + size_t num_attributes = die.GetAttributes(attributes); + for (size_t i = 0; i < num_attributes; ++i) { + dw_attr_t attr = attributes.AttributeAtIndex(i); + DWARFFormValue form_value; + if (!attributes.ExtractFormValueAtIndex(i, form_value)) + continue; + switch (attr) { + case DW_AT_abstract_origin: + abstract_origin = form_value; + break; + + case DW_AT_accessibility: + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); + break; + + case DW_AT_artificial: + if (form_value.Boolean()) + setIsArtificial(); + break; + + case DW_AT_bit_stride: + bit_stride = form_value.Unsigned(); + break; + + case DW_AT_byte_size: + byte_size = form_value.Unsigned(); + break; + + case DW_AT_byte_stride: + byte_stride = form_value.Unsigned(); + break; + + case DW_AT_calling_convention: + calling_convention = form_value.Unsigned(); + break; + + case DW_AT_containing_type: + containing_type = form_value; + break; + + case DW_AT_decl_file: + // die.GetCU() can differ if DW_AT_specification uses DW_FORM_ref_addr. + decl.SetFile( + attributes.CompileUnitAtIndex(i)->GetFile(form_value.Unsigned())); + break; + case DW_AT_decl_line: + decl.SetLine(form_value.Unsigned()); + break; + case DW_AT_decl_column: + decl.SetColumn(form_value.Unsigned()); + break; + + case DW_AT_declaration: + if (form_value.Boolean()) + setIsForwardDeclaration(); + break; + + case DW_AT_encoding: + encoding = form_value.Unsigned(); + break; + + case DW_AT_enum_class: + if (form_value.Boolean()) + setIsScopedEnum(); + break; + + case DW_AT_explicit: + if (form_value.Boolean()) + setIsExplicit(); + break; + + case DW_AT_external: + if (form_value.Unsigned()) + setIsExternal(); + break; + + case DW_AT_inline: + if (form_value.Boolean()) + setIsInline(); + break; + + case DW_AT_linkage_name: + case DW_AT_MIPS_linkage_name: + mangled_name = form_value.AsCString(); + break; + + case DW_AT_name: + name.SetCString(form_value.AsCString()); + break; + + case DW_AT_object_pointer: + object_pointer = form_value.Reference(); + break; + + case DW_AT_signature: + signature = form_value; + break; + + case DW_AT_specification: + specification = form_value; + break; + + case DW_AT_type: + type = form_value; + break; + + case DW_AT_virtuality: + if (form_value.Unsigned()) + setIsVirtual(); + break; + + case DW_AT_APPLE_objc_complete_type: + if (form_value.Signed()) + setIsObjCCompleteType(); + break; + + case DW_AT_APPLE_objc_direct: + setIsObjCDirectCall(); + break; + + case DW_AT_APPLE_runtime_class: + class_language = (LanguageType)form_value.Signed(); + break; + + case DW_AT_GNU_vector: + if (form_value.Boolean()) + setIsVector(); + break; + case DW_AT_export_symbols: + if (form_value.Boolean()) + setIsExportsSymbols(); + break; + } + } +} + +TypeSP DWARFASTParser::UpdateSymbolContextScopeForType( + const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) { + if (!type_sp) + return type_sp; + + SymbolFileDWARF *dwarf = die.GetDWARF(); + TypeList &type_list = dwarf->GetTypeList(); + DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); + dw_tag_t sc_parent_tag = sc_parent_die.Tag(); + + SymbolContextScope *symbol_context_scope = nullptr; + if (sc_parent_tag == DW_TAG_compile_unit || + sc_parent_tag == DW_TAG_partial_unit) { + symbol_context_scope = sc.comp_unit; + } else if (sc.function != nullptr && sc_parent_die) { + symbol_context_scope = + sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); + if (symbol_context_scope == nullptr) + symbol_context_scope = sc.function; + } else { + symbol_context_scope = sc.module_sp.get(); + } + + if (symbol_context_scope != nullptr) + type_sp->SetSymbolContextScope(symbol_context_scope); + + // We are ready to put this type into the uniqued list up at the module + // level. + type_list.Insert(type_sp); + + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; +} + +lldb_private::Type::EncodingDataType DWARFASTParser::GetEncodingFromDWARFTypeTag(dw_tag_t tag) +{ + switch(tag) { + case DW_TAG_pointer_type: + return Type::eEncodingIsPointerUID; + case DW_TAG_reference_type: + return Type::eEncodingIsLValueReferenceUID; + case DW_TAG_rvalue_reference_type: + return Type::eEncodingIsRValueReferenceUID; + case DW_TAG_typedef: + return Type::eEncodingIsTypedefUID; + case DW_TAG_const_type: + return Type::eEncodingIsConstUID; + case DW_TAG_restrict_type: + return Type::eEncodingIsRestrictUID; + case DW_TAG_volatile_type: + return Type::eEncodingIsVolatileUID; + case DW_TAG_atomic_type: + return Type::eEncodingIsAtomicUID; + default: + return Type::eEncodingIsUID; + } +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 00123a4b9216..fe1945dff375 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -9,11 +9,17 @@ #ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H +#include "DWARFDIE.h" #include "DWARFDefines.h" +#include "DWARFFormValue.h" +#include "lldb/Core/Declaration.h" +#include "lldb/Utility/ConstString.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Symbol/SymbolFile.h" +#include "lldb/Symbol/Type.h" #include "lldb/Symbol/CompilerDecl.h" #include "lldb/Symbol/CompilerDeclContext.h" +#include "lldb/lldb-enumerations.h" class DWARFDIE; namespace lldb_private { @@ -54,6 +60,150 @@ class DWARFASTParser { static llvm::Optional ParseChildArrayInfo(const DWARFDIE &parent_die, const lldb_private::ExecutionContext *exe_ctx = nullptr); + + static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); + + static lldb_private::Type::EncodingDataType GetEncodingFromDWARFTypeTag(dw_tag_t tag); + + /// If \p type_sp is valid, calculate and set its symbol context scope, and + /// update the type list for its backing symbol file. + /// + /// Returns \p type_sp. + lldb::TypeSP + UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, lldb::TypeSP type_sp); +}; + +/// Parsed form of all attributes that are relevant for type reconstruction. +/// Some attributes are relevant for all kinds of types (declaration), while +/// others are only meaningful to a specific type (is_virtual). +struct ParsedDWARFTypeAttributes { + explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + + lldb::AccessType accessibility = lldb::eAccessNone; + const char *mangled_name = nullptr; + lldb_private::ConstString name; + lldb_private::Declaration decl; + DWARFDIE object_pointer; + DWARFFormValue abstract_origin; + DWARFFormValue containing_type; + DWARFFormValue signature; + DWARFFormValue specification; + DWARFFormValue type; + lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; + llvm::Optional byte_size; + size_t calling_convention = llvm::dwarf::DW_CC_normal; + uint32_t bit_stride = 0; + uint32_t byte_stride = 0; + uint32_t encoding = 0; + uint32_t attr_flags = 0; + + // clang-format off + enum DWARFAttributeFlags : unsigned { + /// Represents no attributes. + eDWARFAttributeNone = 0u, + /// Whether it is an artificially generated symbol. + eDWARFAttributeArtificial = 1u << 0, + /// Whether it has explicit property of member function. + eDWARFAttributeExplicit = 1u << 1, + /// Whether it is a forward declaration. + eDWARFAttributeForwardDecl = 1u << 2, + /// Whether it is an inlined symbol. + eDWARFAttributeInline = 1u << 3, + /// Whether it is a scoped enumeration (class enum). + eDWARFAttributeScopedEnum = 1u << 4, + /// Whether it has vector attribute. + eDWARFAttributeVector = 1u << 5, + /// Whether it has virtuality attribute. + eDWARFAttributeVirtual = 1u << 6, + /// Whether it is an external symbol. + eDWARFAttributeExternal = 1u << 7, + /// Whether it export symbols to the containing scope. + eDWARFAttributeExportSymbols = 1u << 8, + /// Whether it is an Objective-C direct call. + eDWARFAttributeObjCDirect = 1u << 9, + /// Whether it is an Objective-C complete type. + eDWARFAttributeObjCCompleteType = 1u << 10, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/eDWARFAttributeObjCCompleteType), + }; + // clang-format on + + void setIsArtificial() { + attr_flags |= eDWARFAttributeArtificial; + } + bool isArtificial() const { + return attr_flags & eDWARFAttributeArtificial; + } + + void setIsExplicit() { + attr_flags |= eDWARFAttributeExplicit; + } + bool isExplicit() const { + return attr_flags & eDWARFAttributeExplicit; + } + + void setIsForwardDeclaration() { + attr_flags |= eDWARFAttributeForwardDecl; + } + bool isForwardDeclaration() const { + return attr_flags & eDWARFAttributeForwardDecl; + } + + void setIsInline() { + attr_flags |= eDWARFAttributeInline; + } + bool isInline() const { + return attr_flags & eDWARFAttributeInline; + } + + void setIsScopedEnum() { + attr_flags |= eDWARFAttributeScopedEnum; + } + bool isScopedEnum() const { + return attr_flags & eDWARFAttributeScopedEnum; + } + + void setIsVector() { + attr_flags |= eDWARFAttributeVector; + } + bool isVector() const { + return attr_flags & eDWARFAttributeVector; + } + + void setIsVirtual() { + attr_flags |= eDWARFAttributeVirtual; + } + bool isVirtual() const { + return attr_flags & eDWARFAttributeVirtual; + } + + void setIsExternal() { + attr_flags |= eDWARFAttributeExternal; + } + bool isExternal() const { + return attr_flags & eDWARFAttributeExternal; + } + + void setIsExportsSymbols() { + attr_flags |= eDWARFAttributeExportSymbols; + } + bool isExportsSymbols() const { + return attr_flags & eDWARFAttributeExportSymbols; + } + + void setIsObjCDirectCall() { + attr_flags |= eDWARFAttributeObjCDirect; + } + bool isObjCDirectCall() const { + return attr_flags & eDWARFAttributeObjCDirect; + } + + void setIsObjCCompleteType() { + attr_flags |= eDWARFAttributeObjCCompleteType; + } + bool isObjCCompleteType() const { + return attr_flags & eDWARFAttributeObjCCompleteType; + } }; #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 4ac6e165dda3..0b096f2a4dc4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -8,6 +8,7 @@ #include +#include "DWARFASTParser.h" #include "DWARFASTParserClang.h" #include "DWARFDebugInfo.h" #include "DWARFDeclContext.h" @@ -62,20 +63,6 @@ DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) DWARFASTParserClang::~DWARFASTParserClang() = default; -static AccessType DW_ACCESS_to_AccessType(uint32_t dwarf_accessibility) { - switch (dwarf_accessibility) { - case DW_ACCESS_public: - return eAccessPublic; - case DW_ACCESS_private: - return eAccessPrivate; - case DW_ACCESS_protected: - return eAccessProtected; - default: - break; - } - return eAccessNone; -} - static bool DeclKindIsCXXClass(clang::Decl::Kind decl_kind) { switch (decl_kind) { case clang::Decl::CXXRecord: @@ -299,135 +286,6 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, ForcefullyCompleteType(type); } -ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { - DWARFAttributes attributes; - size_t num_attributes = die.GetAttributes(attributes); - for (size_t i = 0; i < num_attributes; ++i) { - dw_attr_t attr = attributes.AttributeAtIndex(i); - DWARFFormValue form_value; - if (!attributes.ExtractFormValueAtIndex(i, form_value)) - continue; - switch (attr) { - case DW_AT_abstract_origin: - abstract_origin = form_value; - break; - - case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); - break; - - case DW_AT_artificial: - is_artificial = form_value.Boolean(); - break; - - case DW_AT_bit_stride: - bit_stride = form_value.Unsigned(); - break; - - case DW_AT_byte_size: - byte_size = form_value.Unsigned(); - break; - - case DW_AT_byte_stride: - byte_stride = form_value.Unsigned(); - break; - - case DW_AT_calling_convention: - calling_convention = form_value.Unsigned(); - break; - - case DW_AT_containing_type: - containing_type = form_value; - break; - - case DW_AT_decl_file: - // die.GetCU() can differ if DW_AT_specification uses DW_FORM_ref_addr. - decl.SetFile( - attributes.CompileUnitAtIndex(i)->GetFile(form_value.Unsigned())); - break; - case DW_AT_decl_line: - decl.SetLine(form_value.Unsigned()); - break; - case DW_AT_decl_column: - decl.SetColumn(form_value.Unsigned()); - break; - - case DW_AT_declaration: - is_forward_declaration = form_value.Boolean(); - break; - - case DW_AT_encoding: - encoding = form_value.Unsigned(); - break; - - case DW_AT_enum_class: - is_scoped_enum = form_value.Boolean(); - break; - - case DW_AT_explicit: - is_explicit = form_value.Boolean(); - break; - - case DW_AT_external: - if (form_value.Unsigned()) - storage = clang::SC_Extern; - break; - - case DW_AT_inline: - is_inline = form_value.Boolean(); - break; - - case DW_AT_linkage_name: - case DW_AT_MIPS_linkage_name: - mangled_name = form_value.AsCString(); - break; - - case DW_AT_name: - name.SetCString(form_value.AsCString()); - break; - - case DW_AT_object_pointer: - object_pointer = form_value.Reference(); - break; - - case DW_AT_signature: - signature = form_value; - break; - - case DW_AT_specification: - specification = form_value; - break; - - case DW_AT_type: - type = form_value; - break; - - case DW_AT_virtuality: - is_virtual = form_value.Boolean(); - break; - - case DW_AT_APPLE_objc_complete_type: - is_complete_objc_class = form_value.Signed(); - break; - - case DW_AT_APPLE_objc_direct: - is_objc_direct_call = true; - break; - - case DW_AT_APPLE_runtime_class: - class_language = (LanguageType)form_value.Signed(); - break; - - case DW_AT_GNU_vector: - is_vector = form_value.Boolean(); - break; - case DW_AT_export_symbols: - exports_symbols = form_value.Boolean(); - break; - } - } -} - static std::string GetUnitName(const DWARFDIE &die) { if (DWARFUnit *unit = die.GetCU()) return unit->GetAbsolutePath().GetPath(); @@ -554,7 +412,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU()); Type::ResolveState resolve_state = Type::ResolveState::Unresolved; - Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; + Type::EncodingDataType encoding_data_type = GetEncodingFromDWARFTypeTag(tag); TypeSP type_sp; CompilerType clang_type; @@ -630,31 +488,6 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, attrs.name.GetStringRef(), attrs.encoding, attrs.byte_size.getValueOr(0) * 8); break; - - case DW_TAG_pointer_type: - encoding_data_type = Type::eEncodingIsPointerUID; - break; - case DW_TAG_reference_type: - encoding_data_type = Type::eEncodingIsLValueReferenceUID; - break; - case DW_TAG_rvalue_reference_type: - encoding_data_type = Type::eEncodingIsRValueReferenceUID; - break; - case DW_TAG_typedef: - encoding_data_type = Type::eEncodingIsTypedefUID; - break; - case DW_TAG_const_type: - encoding_data_type = Type::eEncodingIsConstUID; - break; - case DW_TAG_restrict_type: - encoding_data_type = Type::eEncodingIsRestrictUID; - break; - case DW_TAG_volatile_type: - encoding_data_type = Type::eEncodingIsVolatileUID; - break; - case DW_TAG_atomic_type: - encoding_data_type = Type::eEncodingIsAtomicUID; - break; } if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID || @@ -777,7 +610,7 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); TypeSP type_sp; - if (attrs.is_forward_declaration) { + if (attrs.isForwardDeclaration()) { type_sp = ParseTypeFromClangModule(sc, die, log); if (type_sp) return type_sp; @@ -846,7 +679,7 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, attrs.name.GetStringRef(), GetClangDeclContextContainingDIE(die, nullptr), GetOwningClangModule(die), attrs.decl, enumerator_clang_type, - attrs.is_scoped_enum); + attrs.isScopedEnum()); } else { enumerator_clang_type = m_ast.GetEnumerationIntegerType(clang_type); } @@ -1025,8 +858,8 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, clang::ObjCMethodDecl *objc_method_decl = m_ast.AddMethodToObjCObjectType( class_opaque_type, attrs.name.GetCString(), clang_type, - attrs.accessibility, attrs.is_artificial, is_variadic, - attrs.is_objc_direct_call); + attrs.accessibility, attrs.isArtificial(), is_variadic, + attrs.isObjCDirectCall()); type_handled = objc_method_decl != NULL; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); @@ -1174,14 +1007,14 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, m_ast.AddMethodToCXXRecordType( class_opaque_type.GetOpaqueQualType(), attrs.name.GetCString(), attrs.mangled_name, - clang_type, attrs.accessibility, attrs.is_virtual, - is_static, attrs.is_inline, attrs.is_explicit, - is_attr_used, attrs.is_artificial); + clang_type, attrs.accessibility, attrs.isVirtual(), + is_static, attrs.isInline(), attrs.isExplicit(), + is_attr_used, attrs.isArtificial()); type_handled = cxx_method_decl != NULL; // Artificial methods are always handled even when we // don't create a new declaration for them. - type_handled |= attrs.is_artificial; + type_handled |= attrs.isArtificial(); if (cxx_method_decl) { LinkDeclContextToDIE(cxx_method_decl, die); @@ -1278,8 +1111,8 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, function_decl = m_ast.CreateFunctionDeclaration( ignore_containing_context ? m_ast.GetTranslationUnitDecl() : containing_decl_ctx, - GetOwningClangModule(die), name, clang_type, attrs.storage, - attrs.is_inline); + GetOwningClangModule(die), name, clang_type, attrs.isExternal() ? clang::SC_Extern : clang::SC_None, + attrs.isInline()); std::free(name_buf); if (has_template_params) { @@ -1289,7 +1122,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, ignore_containing_context ? m_ast.GetTranslationUnitDecl() : containing_decl_ctx, GetOwningClangModule(die), attrs.name.GetStringRef(), clang_type, - attrs.storage, attrs.is_inline); + attrs.isExternal() ? clang::SC_Extern : clang::SC_None, attrs.isInline()); clang::FunctionTemplateDecl *func_template_decl = m_ast.CreateFunctionTemplateDecl( containing_decl_ctx, GetOwningClangModule(die), @@ -1363,7 +1196,7 @@ TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, for (auto pos = array_info->element_orders.rbegin(); pos != end; ++pos) { num_elements = *pos; clang_type = m_ast.CreateArrayType(array_element_type, num_elements, - attrs.is_vector); + attrs.isVector()); array_element_type = clang_type; array_element_bit_stride = num_elements ? array_element_bit_stride * num_elements @@ -1371,7 +1204,7 @@ TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, } } else { clang_type = - m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); + m_ast.CreateArrayType(array_element_type, 0, attrs.isVector()); } ConstString empty_name; TypeSP type_sp = std::make_shared( @@ -1463,7 +1296,7 @@ void DWARFASTParserClang::ParseInheritance( break; case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); break; case DW_AT_virtuality: @@ -1524,40 +1357,6 @@ void DWARFASTParserClang::ParseInheritance( } } -TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( - const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) { - if (!type_sp) - return type_sp; - - SymbolFileDWARF *dwarf = die.GetDWARF(); - TypeList &type_list = dwarf->GetTypeList(); - DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); - dw_tag_t sc_parent_tag = sc_parent_die.Tag(); - - SymbolContextScope *symbol_context_scope = nullptr; - if (sc_parent_tag == DW_TAG_compile_unit || - sc_parent_tag == DW_TAG_partial_unit) { - symbol_context_scope = sc.comp_unit; - } else if (sc.function != nullptr && sc_parent_die) { - symbol_context_scope = - sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); - if (symbol_context_scope == nullptr) - symbol_context_scope = sc.function; - } else { - symbol_context_scope = sc.module_sp.get(); - } - - if (symbol_context_scope != nullptr) - type_sp->SetSymbolContextScope(symbol_context_scope); - - // We are ready to put this type into the uniqued list up at the module - // level. - type_list.Insert(type_sp); - - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - return type_sp; -} - TypeSP DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, const DWARFDIE &die, @@ -1631,12 +1430,12 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, // // Note that there is no DW_AT_declaration and there are no children, // and the byte size is zero. - attrs.is_forward_declaration = true; + attrs.setIsForwardDeclaration(); } if (attrs.class_language == eLanguageTypeObjC || attrs.class_language == eLanguageTypeObjC_plus_plus) { - if (!attrs.is_complete_objc_class && + if (!attrs.isObjCCompleteType() && die.Supports_DW_AT_APPLE_objc_complete_type()) { // We have a valid eSymbolTypeObjCClass class symbol whose name // matches the current objective C class that we are trying to find @@ -1677,7 +1476,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, } } - if (attrs.is_forward_declaration) { + if (attrs.isForwardDeclaration()) { // We have a forward declaration to a type and we need to try and // find a full declaration. We look in the current type index just in // case we have a forward declaration followed by an actual @@ -1796,7 +1595,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, clang_type = m_ast.CreateRecordType( decl_ctx, GetOwningClangModule(die), attrs.accessibility, attrs.name.GetCString(), tag_decl_kind, attrs.class_language, - &metadata, attrs.exports_symbols); + &metadata, attrs.isExportsSymbols()); } } @@ -1808,7 +1607,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, LLDB_INVALID_UID, Type::eEncodingIsUID, &attrs.decl, clang_type, Type::ResolveState::Forward, - TypePayloadClang(OptionalClangModuleID(), attrs.is_complete_objc_class)); + TypePayloadClang(OptionalClangModuleID(), attrs.isObjCCompleteType())); // Add our type to the unique type map so we don't end up creating many // copies of the same type over and over in the ASTContext for our @@ -1820,7 +1619,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename, *unique_ast_entry_up); - if (!attrs.is_forward_declaration) { + if (!attrs.isForwardDeclaration()) { // Always start the definition for a class type so that if the class // has child classes or types that require the class to be created // for use as their decl contexts the class will be ready to accept @@ -2532,7 +2331,7 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, break; case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); break; case DW_AT_artificial: is_artificial = form_value.Boolean(); @@ -3074,99 +2873,6 @@ size_t DWARFASTParserClang::ParseChildParameters( return arg_idx; } -llvm::Optional -DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, - const ExecutionContext *exe_ctx) { - SymbolFile::ArrayInfo array_info; - if (!parent_die) - return llvm::None; - - for (DWARFDIE die : parent_die.children()) { - const dw_tag_t tag = die.Tag(); - if (tag != DW_TAG_subrange_type) - continue; - - DWARFAttributes attributes; - const size_t num_child_attributes = die.GetAttributes(attributes); - if (num_child_attributes > 0) { - uint64_t num_elements = 0; - uint64_t lower_bound = 0; - uint64_t upper_bound = 0; - bool upper_bound_valid = false; - uint32_t i; - for (i = 0; i < num_child_attributes; ++i) { - const dw_attr_t attr = attributes.AttributeAtIndex(i); - DWARFFormValue form_value; - if (attributes.ExtractFormValueAtIndex(i, form_value)) { - switch (attr) { - case DW_AT_name: - break; - - case DW_AT_count: - if (DWARFDIE var_die = die.GetReferencedDIE(DW_AT_count)) { - if (var_die.Tag() == DW_TAG_variable) - if (exe_ctx) { - if (auto frame = exe_ctx->GetFrameSP()) { - Status error; - lldb::VariableSP var_sp; - auto valobj_sp = frame->GetValueForVariableExpressionPath( - var_die.GetName(), eNoDynamicValues, 0, var_sp, - error); - if (valobj_sp) { - num_elements = valobj_sp->GetValueAsUnsigned(0); - break; - } - } - } - } else - num_elements = form_value.Unsigned(); - break; - - case DW_AT_bit_stride: - array_info.bit_stride = form_value.Unsigned(); - break; - - case DW_AT_byte_stride: - array_info.byte_stride = form_value.Unsigned(); - break; - - case DW_AT_lower_bound: - lower_bound = form_value.Unsigned(); - break; - - case DW_AT_upper_bound: - upper_bound_valid = true; - upper_bound = form_value.Unsigned(); - break; - - default: - case DW_AT_abstract_origin: - case DW_AT_accessibility: - case DW_AT_allocated: - case DW_AT_associated: - case DW_AT_data_location: - case DW_AT_declaration: - case DW_AT_description: - case DW_AT_sibling: - case DW_AT_threads_scaled: - case DW_AT_type: - case DW_AT_visibility: - break; - } - } - } - - if (num_elements == 0) { - if (upper_bound_valid && upper_bound >= lower_bound) - num_elements = upper_bound - lower_bound + 1; - } - - array_info.element_orders.push_back(num_elements); - } - } - return array_info; -} - Type *DWARFASTParserClang::GetTypeForDIE(const DWARFDIE &die) { if (die) { SymbolFileDWARF *dwarf = die.GetDWARF(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index f97c0c470ab0..798779093dc2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -153,14 +153,6 @@ class DWARFASTParserClang : public DWARFASTParser { void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); - /// If \p type_sp is valid, calculate and set its symbol context scope, and - /// update the type list for its backing symbol file. - /// - /// Returns \p type_sp. - lldb::TypeSP - UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); - /// Follow Clang Module Skeleton CU references to find a type definition. lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, const DWARFDIE &die, @@ -252,39 +244,4 @@ class DWARFASTParserClang : public DWARFASTParser { lldb_private::ClangASTImporter::LayoutInfo &layout_info); }; -/// Parsed form of all attributes that are relevant for type reconstruction. -/// Some attributes are relevant for all kinds of types (declaration), while -/// others are only meaningful to a specific type (is_virtual) -struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); - - lldb::AccessType accessibility = lldb::eAccessNone; - bool is_artificial = false; - bool is_complete_objc_class = false; - bool is_explicit = false; - bool is_forward_declaration = false; - bool is_inline = false; - bool is_scoped_enum = false; - bool is_vector = false; - bool is_virtual = false; - bool is_objc_direct_call = false; - bool exports_symbols = false; - clang::StorageClass storage = clang::SC_None; - const char *mangled_name = nullptr; - lldb_private::ConstString name; - lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; - lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; - llvm::Optional byte_size; - size_t calling_convention = llvm::dwarf::DW_CC_normal; - uint32_t bit_stride = 0; - uint32_t byte_stride = 0; - uint32_t encoding = 0; -}; - #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERCLANG_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp new file mode 100644 index 000000000000..2a0f5ad8fc01 --- /dev/null +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp @@ -0,0 +1,183 @@ +//===-- DWARFASTParserD.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DWARFASTParserD.h" +#include "DWARFASTParserClang.h" +#include "DWARFASTParser.h" +#include "DWARFDebugInfo.h" +#include "DWARFDeclContext.h" +#include "DWARFDefines.h" +#include "SymbolFileDWARF.h" +#include "SymbolFileDWARFDebugMap.h" +#include "SymbolFileDWARFDwo.h" +#include "UniqueDWARFASTType.h" + +#include "Plugins/TypeSystem/D/TypeSystemD.h" + +using namespace lldb_private; +using namespace lldb; + +DWARFASTParserD::DWARFASTParserD(TypeSystemD &ast) + : m_ast(ast) {} + +TypeSP DWARFASTParserD::ParseTypeFromDWARF(const SymbolContext &sc, + const DWARFDIE &die, + bool *type_is_new_ptr) +{ + if (type_is_new_ptr) + *type_is_new_ptr = false; + + if (!die) + return nullptr; + + // TODO: Add logging + + SymbolFileDWARF *dwarf = die.GetDWARF(); + + Type *type_ptr = dwarf->GetDIEToType().lookup(die.GetDIE()); + if (type_ptr == DIE_IS_BEING_PARSED) + return nullptr; + + if (type_ptr) + return type_ptr->shared_from_this(); + // Set a bit that lets us know that we are currently parsing this + dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED; + + ParsedDWARFTypeAttributes attrs(die); + + if (DWARFDIE signature_die = attrs.signature.Reference()) { + if (TypeSP type_sp = + ParseTypeFromDWARF(sc, signature_die, type_is_new_ptr)) { + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; + } + return nullptr; + } + + if (type_is_new_ptr) + *type_is_new_ptr = true; + + const dw_tag_t tag = die.Tag(); + + TypeSP type_sp; + switch(tag) { + case DW_TAG_base_type: + type_sp = ParseSimpleType(sc, die, attrs); + break; + case DW_TAG_typedef: + type_sp = ParseDerivedType(sc, die, attrs); + break; + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + case DW_TAG_const_type: + case DW_TAG_restrict_type: + case DW_TAG_volatile_type: + case DW_TAG_atomic_type: + case DW_TAG_unspecified_type: + return type_sp = TypeSP(); + break; + + default: break; + } + + // TODO: Use Update Symbol function + return UpdateSymbolContextScopeForType(sc, die, type_sp); +} + +lldb::TypeSP +DWARFASTParserD::ParseSimpleType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) +{ + SymbolFileDWARF *dwarf = die.GetDWARF(); + CompilerType cp_type; + + cp_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + attrs.name.GetStringRef(), + attrs.encoding, attrs.byte_size.getValueOr(0) * 8); + + return std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), Type::eEncodingIsUID, &attrs.decl, + cp_type, Type::ResolveState::Full); +} + + +TypeSP +DWARFASTParserD::ParseTypeFromModule(const lldb_private::SymbolContext &sc, + const DWARFDIE &die) { + return TypeSP(); +} + +lldb::TypeSP +DWARFASTParserD::ParseDerivedType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); + CompilerType cp_type; + Type::ResolveState resolve_state = Type::ResolveState::Unresolved; + + switch(tag) { + case DW_TAG_typedef: { + const DWARFDIE encoding_die = attrs.type.Reference(); + if (encoding_die && + encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) { + TypeSP type_sp = ParseTypeFromModule(sc, die); + if (type_sp) + return type_sp; + } + + break; + } + default: + break; + } + + return std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), GetEncodingFromDWARFTypeTag(tag), &attrs.decl, + cp_type, resolve_state); +} + +Function * +DWARFASTParserD::ParseFunctionFromDWARF(CompileUnit &comp_unit, + const DWARFDIE &die, + const AddressRange &range) +{ + return nullptr; +} + +bool +DWARFASTParserD::CompleteTypeFromDWARF(const DWARFDIE &die, Type *type, + lldb_private::CompilerType &compiler_type) +{ + return false; +} + +CompilerDecl +DWARFASTParserD::GetDeclForUIDFromDWARF(const DWARFDIE &die) +{ + return CompilerDecl(); +} + +CompilerDeclContext +DWARFASTParserD::GetDeclContextForUIDFromDWARF(const DWARFDIE &die) +{ + return CompilerDeclContext(); +} + +CompilerDeclContext +DWARFASTParserD::GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) +{ + return CompilerDeclContext(); +} + +void DWARFASTParserD::EnsureAllDIEsInDeclContextHaveBeenParsed( + lldb_private::CompilerDeclContext decl_context) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h new file mode 100644 index 000000000000..8e84ce7417f7 --- /dev/null +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h @@ -0,0 +1,66 @@ +//===-- DWARFASTParser.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERD_H +#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERD_H + +#include "DWARFASTParser.h" +#include "DWARFDefines.h" +#include "lldb/Core/PluginInterface.h" +#include "lldb/Symbol/CompilerDecl.h" +#include "lldb/Symbol/CompilerDeclContext.h" +#include "lldb/Symbol/SymbolFile.h" + +namespace lldb_private { + class TypeSystemD; +} // namespace lldb_private + +class DWARFASTParserD : public DWARFASTParser { +public: + DWARFASTParserD(lldb_private::TypeSystemD &ast); + ~DWARFASTParserD() override = default; + + lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + bool *type_is_new_ptr) override; + + lldb_private::Function * + ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, + const DWARFDIE &die, + const lldb_private::AddressRange &range) override; + + bool + CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + lldb_private::CompilerType &compiler_type) override; + + lldb_private::CompilerDecl + GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + + lldb_private::CompilerDeclContext + GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + + lldb_private::CompilerDeclContext + GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + + void EnsureAllDIEsInDeclContextHaveBeenParsed( + lldb_private::CompilerDeclContext decl_context) override; +private: + lldb::TypeSP ParseSimpleType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + + lldb::TypeSP ParseDerivedType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseTypeFromModule(const lldb_private::SymbolContext &sc, + const DWARFDIE &die); + + lldb_private::TypeSystemD &m_ast; +}; + +#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERD_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index a31ee861179c..d4d9383a2ff7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -9,8 +9,11 @@ #ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H +#include "DWARFDIE.h" #include "DWARFDefines.h" #include "DWARFFormValue.h" +#include "lldb/Core/Declaration.h" +#include "lldb/Utility/ConstString.h" #include "llvm/ADT/SmallVector.h" #include diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index cece29dcf9ac..71d4c1e6c52f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -153,7 +153,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFAbbreviationDeclarationSet *GetAbbreviations() const; dw_offset_t GetAbbrevOffset() const; uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); } - dw_addr_t GetAddrBase() const { return m_addr_base ? *m_addr_base : 0; } + dw_addr_t GetAddrBase() const { return m_addr_base.getValueOr(0); } dw_addr_t GetBaseAddress() const { return m_base_addr; } dw_offset_t GetLineTableOffset(); dw_addr_t GetRangesBase() const { return m_ranges_base; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index e81ce28cb86e..80a38f77c2ca 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -75,7 +75,9 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; + friend class DWARFASTParser; friend class DWARFASTParserClang; + friend class DWARFASTParserD; // Static Functions static void Initialize(); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 7ce2f1451580..9473befa6cc3 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -1280,15 +1280,15 @@ clang::QualType PdbAstBuilder::CreateFunctionType( } static bool isTagDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } static bool isFunctionDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } static bool isBlockDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } void PdbAstBuilder::ParseAllNamespacesPlusChildrenOf( diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index bf101ac1acf1..e859b1d5a86c 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -900,7 +900,7 @@ lldb::LanguageType SymbolFileNativePDB::ParseLanguage(CompileUnit &comp_unit) { return TranslateLanguage(item->m_compile_opts->getLanguage()); } -void SymbolFileNativePDB::AddSymbols(Symtab &symtab) { return; } +void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {} size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) { std::lock_guard guard(GetModuleMutex()); diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index db0ae241be7e..a40b6ec9a635 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -239,7 +239,6 @@ void SymbolFilePDB::GetCompileUnitIndex( } } index = UINT32_MAX; - return; } std::unique_ptr @@ -402,7 +401,7 @@ static size_t ParseFunctionBlocksForPDBSymbol( block = parent_block; else break; - } else if (llvm::dyn_cast(pdb_symbol)) { + } else if (llvm::isa(pdb_symbol)) { auto uid = pdb_symbol->getSymIndexId(); if (parent_block->FindBlockByID(uid)) break; diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp index 919cdf46a5c0..a72e46a0b703 100644 --- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp +++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp @@ -73,8 +73,7 @@ bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command, if (thread == nullptr) { const uint32_t num_threads = process->GetThreadList().GetSize(); - size_t tid = m_options.m_thread_index ? *m_options.m_thread_index - : LLDB_INVALID_THREAD_ID; + size_t tid = m_options.m_thread_index.getValueOr(LLDB_INVALID_THREAD_ID); result.AppendErrorWithFormatv( "Thread index {0} is out of range (valid values are 1 - {1}).\n", tid, num_threads); diff --git a/lldb/source/Plugins/TypeSystem/CMakeLists.txt b/lldb/source/Plugins/TypeSystem/CMakeLists.txt index 17c40aee44cc..bdd55515a5f8 100644 --- a/lldb/source/Plugins/TypeSystem/CMakeLists.txt +++ b/lldb/source/Plugins/TypeSystem/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Clang) +add_subdirectory(D) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 0df95594eea2..3a3e8681c5ed 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -106,8 +106,6 @@ TypeSystemClangSupportsLanguage(lldb::LanguageType language) { // Use Clang for Rust until there is a proper language plugin for it language == eLanguageTypeRust || language == eLanguageTypeExtRenderScript || - // Use Clang for D until there is a proper language plugin for it - language == eLanguageTypeD || // Open Dylan compiler debug info is designed to be Clang-compatible language == eLanguageTypeDylan; } @@ -1999,6 +1997,8 @@ TypeSystemClang::GetOpaqueCompilerType(clang::ASTContext *ast, return ast->getSignedWCharType().getAsOpaquePtr(); case eBasicTypeUnsignedWChar: return ast->getUnsignedWCharType().getAsOpaquePtr(); + case eBasicTypeChar8: + return ast->Char8Ty.getAsOpaquePtr(); case eBasicTypeChar16: return ast->Char16Ty.getAsOpaquePtr(); case eBasicTypeChar32: @@ -5149,6 +5149,8 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { case clang::BuiltinType::UChar: case clang::BuiltinType::WChar_U: return lldb::eFormatChar; + case clang::BuiltinType::Char8: + return lldb::eFormatUnicode8; case clang::BuiltinType::Char16: return lldb::eFormatUnicode16; case clang::BuiltinType::Char32: @@ -5442,6 +5444,8 @@ TypeSystemClang::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) { return eBasicTypeSignedChar; case clang::BuiltinType::Char_U: return eBasicTypeUnsignedChar; + case clang::BuiltinType::Char8: + return eBasicTypeChar8; case clang::BuiltinType::Char16: return eBasicTypeChar16; case clang::BuiltinType::Char32: @@ -8957,6 +8961,7 @@ bool TypeSystemClang::DumpTypeValue( case eFormatCharPrintable: case eFormatCharArray: case eFormatBytes: + case eFormatUnicode8: case eFormatBytesWithASCII: item_count = byte_size; byte_size = 1; diff --git a/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt b/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt new file mode 100644 index 000000000000..19967fb6764d --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt @@ -0,0 +1,16 @@ +add_lldb_library(lldbPluginTypeSystemD PLUGIN + TypeSystemD.cpp + DType.cpp + + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbUtility + lldbPluginExpressionParserClang + lldbPluginSymbolFileDWARF + lldbPluginSymbolFilePDB + lldbPluginObjCRuntime + LINK_COMPONENTS + Support +) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp new file mode 100644 index 000000000000..0ed150fb3a9b --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -0,0 +1,276 @@ +//===-- DType.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DType.h" +#include "lldb/lldb-enumerations.h" +#include "llvm/ADT/Triple.h" + +bool DType::IsBuiltIn() const { + return m_kind & eDTypeKindBuiltin; +} + +lldb::Format DType::GetFormat() const { + switch (m_kind) { + case eDTypeKindBool: + return lldb::eFormatBoolean; + case eDTypeKindChar: + return lldb::eFormatUnicode8; + case eDTypeKindWChar: + return lldb::eFormatUnicode16; + case eDTypeKindDChar: + return lldb::eFormatUnicode32; + case eDTypeKindUShort: + case eDTypeKindUInt: + case eDTypeKindULong: + case eDTypeKindUCent: + return lldb::eFormatUnsigned; + case eDTypeKindShort: + case eDTypeKindInt: + case eDTypeKindLong: + case eDTypeKindCent: + return lldb::eFormatDecimal; + case eDTypeKindFloat: + case eDTypeKindDouble: + case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: + return lldb::eFormatFloat; + case eDTypeKindVoid: + return lldb::eFormatVoid; + default: + return lldb::eFormatBytes; + } +} + +lldb::Encoding DType::GetEncoding(uint64_t &count) const { + count = 1; + switch (m_kind) { + case eDTypeKindByte: + case eDTypeKindShort: + case eDTypeKindInt: + case eDTypeKindLong: + case eDTypeKindCent: + return lldb::eEncodingSint; + case eDTypeKindBool: + case eDTypeKindUByte: + case eDTypeKindUShort: + case eDTypeKindUInt: + case eDTypeKindULong: + case eDTypeKindUCent: + case eDTypeKindChar: + case eDTypeKindWChar: + case eDTypeKindDChar: + return lldb::eEncodingUint; + case eDTypeKindFloat: + case eDTypeKindDouble: + case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: + return lldb::eEncodingIEEE754; + default: + count = 0; + return lldb::eEncodingInvalid; + } +} + +lldb::BasicType DType::GetBasicType() const { + if(!IsBuiltIn()) + return lldb::eBasicTypeOther; + + switch (m_kind) { + case eDTypeKindVoid: + return lldb::eBasicTypeVoid; + case eDTypeKindBool: + return lldb::eBasicTypeBool; + case eDTypeKindByte: + return lldb::eBasicTypeSignedChar; + case eDTypeKindUByte: + return lldb::eBasicTypeUnsignedChar; + case eDTypeKindShort: + return lldb::eBasicTypeShort; + case eDTypeKindUShort: + return lldb::eBasicTypeUnsignedShort; + case eDTypeKindInt: + return lldb::eBasicTypeInt; + case eDTypeKindUInt: + return lldb::eBasicTypeUnsignedInt; + case eDTypeKindLong: + return lldb::eBasicTypeLong; + case eDTypeKindULong: + return lldb::eBasicTypeUnsignedLong; + case eDTypeKindCent: + return lldb::eBasicTypeInt128; + case eDTypeKindUCent: + return lldb::eBasicTypeUnsignedInt128; + case eDTypeKindChar: + return lldb::eBasicTypeChar8; + case eDTypeKindWChar: + return lldb::eBasicTypeChar16; + case eDTypeKindDChar: + return lldb::eBasicTypeChar32; + case eDTypeKindFloat: + return lldb::eBasicTypeFloat; + case eDTypeKindDouble: + return lldb::eBasicTypeDouble; + case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: + return lldb::eBasicTypeLongDouble; + case eDTypeKindCFloat: + return lldb::eBasicTypeFloatComplex; + case eDTypeKindCDouble: + return lldb::eBasicTypeDoubleComplex; + case eDTypeKindCReal: + case eDTypeKindCReal64: + case eDTypeKindCReal80: + case eDTypeKindCReal128: + return lldb::eBasicTypeLongDoubleComplex; + default: + return lldb::eBasicTypeOther; + } +} + +static uint64_t GetRealBitSize(llvm::Triple &target_triple) { + if (target_triple.isX86() && !target_triple.isWindowsMSVCEnvironment() && !target_triple.isAndroid()) + // Assume x87 support: x87 FPU register size + return 80; + + if ((target_triple.isAArch64() && !target_triple.isOSDarwin()) || + (target_triple.isAndroid() && target_triple.getArch() == llvm::Triple::x86_64)) + // Assume 128-bit quadruple precision + return 128; + + // at least size of a double type kind + return 64; +} + +llvm::Optional DType::GetBitSize(llvm::Triple &target_triple) const { + return DType::GetBitSize(m_kind, target_triple); +} + +llvm::Optional DType::GetBitSize(DTypeKind kind, llvm::Triple &target_triple) { + switch(kind) + { + case eDTypeKindPtr: + if (target_triple.isArch64Bit()) + return 64; + if (target_triple.isArch32Bit()) + return 32; + if (target_triple.isArch16Bit()) + return 16; + + // unknown arch bit size + return llvm::None; + case eDTypeKindBool: + case eDTypeKindByte: + case eDTypeKindUByte: + case eDTypeKindChar: + return 8; + case eDTypeKindShort: + case eDTypeKindUShort: + case eDTypeKindWChar: + return 16; + case eDTypeKindInt: + case eDTypeKindUInt: + case eDTypeKindDChar: + case eDTypeKindFloat: + return 32; + case eDTypeKindLong: + case eDTypeKindULong: + case eDTypeKindDouble: + case eDTypeKindCFloat: + case eDTypeKindReal64: + return 64; + case eDTypeKindReal80: + return 80; + case eDTypeKindCent: + case eDTypeKindUCent: + case eDTypeKindCDouble: + case eDTypeKindReal128: + case eDTypeKindCReal64: + return 128; + case eDTypeKindCReal80: + return 160; + case eDTypeKindCReal128: + return 256; + case eDTypeKindReal: + return GetRealBitSize(target_triple); + case eDTypeKindCReal: + return GetRealBitSize(target_triple) * 2; + default: + return llvm::None; + } +} + +ConstString DType::GetName(DTypeKind kind) +{ + switch(kind) + { + case eDTypeKindPtr: + return ConstString("void*"); + case eDTypeKindVoid: + return ConstString("void"); + case eDTypeKindBool: + return ConstString("bool"); + case eDTypeKindByte: + return ConstString("byte"); + case eDTypeKindUByte: + return ConstString("ubyte"); + case eDTypeKindShort: + return ConstString("short"); + case eDTypeKindUShort: + return ConstString("ushort"); + case eDTypeKindInt: + return ConstString("int"); + case eDTypeKindUInt: + return ConstString("uint"); + case eDTypeKindLong: + return ConstString("long"); + case eDTypeKindULong: + return ConstString("ulong"); + case eDTypeKindCent: + return ConstString("cent"); + case eDTypeKindUCent: + return ConstString("ucent"); + case eDTypeKindChar: + return ConstString("char"); + case eDTypeKindWChar: + return ConstString("wchar"); + case eDTypeKindDChar: + return ConstString("dchar"); + case eDTypeKindFloat: + return ConstString("float"); + case eDTypeKindDouble: + return ConstString("double"); + case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: + return ConstString("real"); + case eDTypeKindIFloat: + return ConstString("ifloat"); + case eDTypeKindIDouble: + return ConstString("idouble"); + case eDTypeKindIReal: + return ConstString("ireal"); + case eDTypeKindCFloat: + return ConstString("cfloat"); + case eDTypeKindCDouble: + return ConstString("cdouble"); + case eDTypeKindCReal: + case eDTypeKindCReal64: + case eDTypeKindCReal80: + case eDTypeKindCReal128: + return ConstString("creal"); + default: + return ConstString(); + } +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h new file mode 100644 index 000000000000..9d4441f00a07 --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -0,0 +1,81 @@ +//===-- DType.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H +#define LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H + +#include "lldb/Utility/ConstString.h" +#include "lldb/lldb-enumerations.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Triple.h" + +enum DTypeKind : uint8_t { + eDTypeKindInvalid = 0, + eDTypeKindVoid, + eDTypeKindPtr, + eDTypeKindSlice, + + eDTypeKindBuiltin = 1 << 7, // 128 + eDTypeKindBool = eDTypeKindBuiltin, + eDTypeKindByte, + eDTypeKindUByte, + eDTypeKindShort, + eDTypeKindUShort, + eDTypeKindInt, + eDTypeKindUInt, + eDTypeKindLong, + eDTypeKindULong, + eDTypeKindCent, + eDTypeKindUCent, + eDTypeKindChar, + eDTypeKindWChar, + eDTypeKindDChar, + eDTypeKindFloat, + eDTypeKindDouble, + eDTypeKindReal, + eDTypeKindReal64, + eDTypeKindReal80, + eDTypeKindReal128, + eDTypeKindIFloat, + eDTypeKindIDouble, + eDTypeKindIReal, + eDTypeKindCFloat, + eDTypeKindCDouble, + eDTypeKindCReal, + eDTypeKindCReal64, + eDTypeKindCReal80, + eDTypeKindCReal128, + eDTypeKindMax, +}; + +using namespace lldb_private; + +class DType { +public: + DType(DTypeKind kind, const ConstString &name) : m_kind(kind), m_name(name) {} + virtual ~DType() = default; + + DTypeKind GetKind() { return m_kind; } + const ConstString &GetName() const { return m_name; } + static ConstString GetName(DTypeKind); + + bool IsBuiltIn() const; + + lldb::Format GetFormat() const; + lldb::Encoding GetEncoding(uint64_t &count) const; + lldb::BasicType GetBasicType() const; + + static llvm::Optional GetBitSize(DTypeKind kind, llvm::Triple &target_triple); + llvm::Optional GetBitSize(llvm::Triple &target_triple) const; + +private: + DTypeKind m_kind; + ConstString m_name; +}; + +#endif // LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp new file mode 100644 index 000000000000..45ada00cecac --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -0,0 +1,805 @@ +#include "TypeSystemD.h" +#include "lldb/lldb-enumerations.h" + +#include "lldb/Core/DumpDataExtractor.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/StreamFile.h" +#include "lldb/Symbol/Type.h" +#include "lldb/Utility/ArchSpec.h" +#include "llvm/ADT/StringSwitch.h" + +LLDB_PLUGIN_DEFINE(TypeSystemD) + +namespace lldb_private { + +// LLVM RTTI support +char TypeSystemD::ID; + +TypeSystemD::TypeSystemD(llvm::Triple target_triple) : m_target_triple(target_triple) {} + +TypeSystemD::~TypeSystemD() { Finalize(); } +void TypeSystemD::Finalize() {} + +void TypeSystemD::Initialize() { + PluginManager::RegisterPlugin( + GetPluginNameStatic(), "D base AST context plug-in", CreateInstance, + GetSupportedLanguages(), GetSupportedLanguages()); +} + +void TypeSystemD::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +LanguageSet TypeSystemD::GetSupportedLanguages() { + LanguageSet languages; + languages.Insert(lldb::eLanguageTypeD); + return languages; +} + +lldb::LanguageType +TypeSystemD::GetMinimumLanguage(lldb::opaque_compiler_type_t /*unused*/) { + return lldb::eLanguageTypeD; +} + +bool TypeSystemD::SupportsLanguage(lldb::LanguageType language) { + return language == lldb::eLanguageTypeD; +} + +lldb::TypeSystemSP TypeSystemD::CreateInstance(lldb::LanguageType language, + Module *module, Target *target) { + if (language != lldb::eLanguageTypeD) + return lldb::TypeSystemSP(); + + ArchSpec arch; + if (module) + arch = module->GetArchitecture(); + else if (target) + arch = target->GetArchitecture(); + + if (!arch.IsValid()) + return lldb::TypeSystemSP(); + + llvm::Triple triple = arch.GetTriple(); + // LLVM wants this to be set to iOS or MacOSX; if we're working on + // a bare-boards type image, change the triple for llvm's benefit. + if (triple.getVendor() == llvm::Triple::Apple && + triple.getOS() == llvm::Triple::UnknownOS) { + if (triple.getArch() == llvm::Triple::arm || + triple.getArch() == llvm::Triple::aarch64 || + triple.getArch() == llvm::Triple::aarch64_32 || + triple.getArch() == llvm::Triple::thumb) { + triple.setOS(llvm::Triple::IOS); + } else { + triple.setOS(llvm::Triple::MacOSX); + } + } + + if (module) + return std::shared_ptr(new TypeSystemD(triple)); + + return lldb::TypeSystemSP(); +} + +DWARFASTParser *TypeSystemD::GetDWARFParser() { + if (!m_dwarf_ast_parser_up) + m_dwarf_ast_parser_up = std::make_unique(*this); + return m_dwarf_ast_parser_up.get(); +} + +#ifndef NDEBUG +LLVM_DUMP_METHOD void +TypeSystemD::dump(lldb::opaque_compiler_type_t type) const {} + +bool TypeSystemD::Verify(lldb::opaque_compiler_type_t type) { return true; } +#endif + +void TypeSystemD::Dump(llvm::raw_ostream &output) {} + +void TypeSystemD::DumpValue(lldb::opaque_compiler_type_t type, + ExecutionContext *exe_ctx, Stream *s, + lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size, + uint32_t bitfield_bit_size, + uint32_t bitfield_bit_offset, bool show_types, + bool show_summary, bool verbose, uint32_t depth) { + if (!type) + return; + DType *dtype = static_cast(type); + + switch(dtype->GetKind()) + { + default: + // We are down to a scalar type that we just need to display. + DumpDataExtractor(data, s, data_offset, format, data_byte_size, 1, + UINT32_MAX, LLDB_INVALID_ADDRESS, bitfield_bit_size, + bitfield_bit_offset); + + if (show_summary) + DumpSummary(type, exe_ctx, s, data, data_offset, data_byte_size); + } +} + +bool TypeSystemD::DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s, + lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, + size_t data_byte_size, + uint32_t bitfield_bit_size, + uint32_t bitfield_bit_offset, + ExecutionContextScope *exe_scope) { + if (!type) + return false; + + uint32_t item_count = 1; + return DumpDataExtractor(data, s, data_offset, format, data_byte_size, + item_count, UINT32_MAX, LLDB_INVALID_ADDRESS, + bitfield_bit_size, bitfield_bit_offset, exe_scope); +} + +void TypeSystemD::DumpSummary(lldb::opaque_compiler_type_t type, + ExecutionContext *exe_ctx, Stream *s, + const DataExtractor &data, + lldb::offset_t data_offset, + size_t data_byte_size) {} + +void TypeSystemD::DumpTypeDescription(lldb::opaque_compiler_type_t type, + lldb::DescriptionLevel level) { + StreamFile s(stdout, false); + DumpTypeDescription(type, &s, level); +} + +void TypeSystemD::DumpTypeDescription(lldb::opaque_compiler_type_t type, + Stream *s, lldb::DescriptionLevel level) { +} + +bool TypeSystemD::IsVectorType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size) { + return false; +} + +uint32_t TypeSystemD::IsHomogeneousAggregate(lldb::opaque_compiler_type_t type, + CompilerType *base_type_ptr) { + return 0; +} + +bool TypeSystemD::IsReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type, bool *is_rvalue) { + return false; +} + +bool TypeSystemD::IsPossibleDynamicType( + lldb::opaque_compiler_type_t type, + CompilerType *target_type, // Can pass nullptr + bool check_cplusplus, bool check_objc) { + return false; +} + +bool TypeSystemD::IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsPointerType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) { + return false; +} + +bool TypeSystemD::IsPointerOrReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) { + return false; +} +bool TypeSystemD::IsArrayType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size, + bool *is_incomplete) { + return false; +} + +bool TypeSystemD::IsAggregateType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsAnonymousType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsBeingDefined(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsCharType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsCompleteType(lldb::opaque_compiler_type_t type) { + if (!type) + return false; + return true; +} + +bool TypeSystemD::IsConst(lldb::opaque_compiler_type_t type) { return false; } + +bool TypeSystemD::IsCStringType(lldb::opaque_compiler_type_t type, + uint32_t &length) { + return false; +} + +bool TypeSystemD::IsTypedefType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsPolymorphicClass(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsFunctionPointerType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsBlockPointerType(lldb::opaque_compiler_type_t type, + CompilerType *function_pointer_type_ptr) { + return false; +} + +bool TypeSystemD::IsIntegerType(lldb::opaque_compiler_type_t type, + bool &is_signed) { + return false; +} + +bool TypeSystemD::IsEnumerationType(lldb::opaque_compiler_type_t type, + bool &is_signed) { + return false; +} + +bool TypeSystemD::IsScopedEnumerationType(lldb::opaque_compiler_type_t type) { + return false; +} + +uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type, + CompilerType *base_type_ptr) { + return 0; +} + +bool TypeSystemD::IsScalarType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsFunctionType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsVoidType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsDefined(lldb::opaque_compiler_type_t type) { return false; } + +bool TypeSystemD::IsFloatingPointType(lldb::opaque_compiler_type_t type, + uint32_t &count, bool &is_complex) { + return false; +} + +bool TypeSystemD::CanPassInRegisters(const CompilerType &type) { return false; } + +void TypeSystemD::ForEachEnumerator( + lldb::opaque_compiler_type_t type, + std::function const &callback) {} + +CompilerType TypeSystemD::GetTypedefedType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, + size_t bit_size) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetBasicTypeFromAST(lldb::BasicType basic_type) { + return CompilerType(); +} + +llvm::Optional +TypeSystemD::GetBitSize(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) { + if(!GetCompleteType(type)) + return llvm::None; + + return static_cast(type)->GetBitSize(m_target_triple); +} + +lldb::Encoding TypeSystemD::GetEncoding(lldb::opaque_compiler_type_t type, + uint64_t &count) { + if (!type) { + count = 0; + return lldb::eEncodingInvalid; + } + + return static_cast(type)->GetEncoding(count); +} + +lldb::Format TypeSystemD::GetFormat(lldb::opaque_compiler_type_t type) { + if (!type) + return lldb::eFormatDefault; + + return static_cast(type)->GetFormat(); +} + +lldb::TypeClass TypeSystemD::GetTypeClass(lldb::opaque_compiler_type_t type) { + return lldb::eTypeClassInvalid; +} + +unsigned TypeSystemD::GetTypeQualifiers(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType TypeSystemD::GetTypeForDecl(void *opaque_decl) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetChildCompilerTypeAtIndex( + lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx, + bool transparent_pointers, bool omit_empty_base_classes, + bool ignore_array_bounds, std::string &child_name, + uint32_t &child_byte_size, int32_t &child_byte_offset, + uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset, + bool &child_is_base_class, bool &child_is_deref_of_parent, + ValueObject *valobj, uint64_t &language_flags) { + return CompilerType(); +} + +uint32_t TypeSystemD::GetNumFields(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType TypeSystemD::GetFieldAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, std::string &name, + uint64_t *bit_offset_ptr, + uint32_t *bitfield_bit_size_ptr, + bool *is_bitfield_ptr) { + return CompilerType(); +} + +uint32_t +TypeSystemD::GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) { + return 0; +} + +uint32_t +TypeSystemD::GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType +TypeSystemD::GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, uint32_t *bit_offset_ptr) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, uint32_t *bit_offset_ptr) { + return CompilerType(); +} + +uint32_t TypeSystemD::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type, + const char *name, + bool omit_empty_base_classes) { + return 0; +} + +size_t TypeSystemD::GetIndexOfChildMemberWithName( + lldb::opaque_compiler_type_t type, const char *name, + bool omit_empty_base_classes, std::vector &child_indexes) { + return 0; +} + +size_t TypeSystemD::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { + return 0; +} + +lldb::TemplateArgumentKind +TypeSystemD::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, + size_t idx) { + return lldb::eTemplateArgumentKindNull; +} +CompilerType +TypeSystemD::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) { + return CompilerType(); +} +llvm::Optional +TypeSystemD::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) { + return llvm::None; +} + +CompilerType TypeSystemD::GetTypeForFormatters(void *type) { + if(!type) + return CompilerType(); + return CompilerType(this, type); +} + +llvm::Optional +TypeSystemD::GetTypeBitAlign(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) { + //TODO: Support types with different alignment + return 0; +} + +uint32_t TypeSystemD::GetNumChildren(lldb::opaque_compiler_type_t type, + bool omit_empty_base_classes, + const ExecutionContext *exe_ctx) { + return 0; +} + +CompilerType TypeSystemD::GetBuiltinTypeByName(ConstString name) { + return CompilerType(); +} + +lldb::BasicType +TypeSystemD::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) { + if (!type) + return lldb::eBasicTypeInvalid; + + return static_cast(type)->GetBasicType(); +} + +CompilerType +TypeSystemD::GetArrayElementType(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetArrayType(lldb::opaque_compiler_type_t type, + uint64_t size) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetCanonicalType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) { + return CompilerType(this, type); +} + +CompilerType +TypeSystemD::GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +// Returns -1 if this isn't a function of if the function doesn't have a +// prototype Returns a value >= 0 if there is a prototype. +int TypeSystemD::GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) { + return -1; +} + +CompilerType +TypeSystemD::GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetFunctionReturnType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +size_t TypeSystemD::GetNumMemberFunctions(lldb::opaque_compiler_type_t type) { + return 0; +} + +TypeMemberFunctionImpl +TypeSystemD::GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) { + return TypeMemberFunctionImpl(); +} + +CompilerType +TypeSystemD::GetNonReferenceType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetPointeeType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetPointerType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetLValueReferenceType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetRValueReferenceType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetAtomicType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::AddConstModifier(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::AddVolatileModifier(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::AddRestrictModifier(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +const llvm::fltSemantics &TypeSystemD::GetFloatTypeSemantics(size_t byte_size) { + return llvm::APFloatBase::Bogus(); +} + +bool TypeSystemD::GetCompleteType(lldb::opaque_compiler_type_t type) { + if (!type) + return false; + DType* dtype = static_cast(type); + if (dtype->IsBuiltIn()) + return true; + + //TODO: Implement this for other kinds + return false; +} + +ConstString TypeSystemD::GetTypeName(lldb::opaque_compiler_type_t type) { + if (type) + return static_cast(type)->GetName(); + return ConstString(); +} + +ConstString TypeSystemD::GetDisplayTypeName(lldb::opaque_compiler_type_t type) { + return GetTypeName(type); +} + +uint32_t +TypeSystemD::GetTypeInfo(lldb::opaque_compiler_type_t type, + CompilerType *pointee_or_element_compiler_type) { + if (!type) + return 0; + + DType* dtype = static_cast(type); + uint32_t typeinfo_flags = 0; + + if(dtype->IsBuiltIn()) + { + typeinfo_flags = lldb::eTypeIsBuiltIn | lldb::eTypeHasValue; + switch(dtype->GetKind()) + { + case eDTypeKindIFloat: + case eDTypeKindIDouble: + case eDTypeKindIReal: + case eDTypeKindCFloat: + case eDTypeKindCDouble: + case eDTypeKindCReal: + typeinfo_flags |= lldb::eTypeIsComplex; + LLVM_FALLTHROUGH; + case eDTypeKindFloat: + case eDTypeKindDouble: + case eDTypeKindReal: + typeinfo_flags |= lldb::eTypeIsFloat; + goto Lscalar; + case eDTypeKindByte: + case eDTypeKindShort: + case eDTypeKindInt: + case eDTypeKindLong: + case eDTypeKindCent: + typeinfo_flags |= lldb::eTypeIsSigned; + LLVM_FALLTHROUGH; + case eDTypeKindUByte: + case eDTypeKindUShort: + case eDTypeKindUInt: + case eDTypeKindULong: + case eDTypeKindUCent: + typeinfo_flags |= lldb::eTypeIsInteger; + LLVM_FALLTHROUGH; + case eDTypeKindBool: + case eDTypeKindChar: + case eDTypeKindWChar: + case eDTypeKindDChar: + Lscalar: + typeinfo_flags |= lldb::eTypeIsScalar; + break; + default: + break; + } + } + return typeinfo_flags; +} + +uint32_t TypeSystemD::GetPointerByteSize() { return 0; } + +size_t +TypeSystemD::GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType +TypeSystemD::GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type, + const size_t index) { + return CompilerType(); +} + +ConstString TypeSystemD::DeclGetName(void *opaque_decl) { + return ConstString(); +} + +std::vector +TypeSystemD::DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name, + const bool ignore_using_decls) { + return std::vector(); +} + +ConstString TypeSystemD::DeclContextGetName(void *opaque_decl_ctx) { + return ConstString(); +} + +ConstString +TypeSystemD::DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) { + return ConstString(); +} + +bool TypeSystemD::DeclContextIsClassMethod( + void *opaque_decl_ctx, lldb::LanguageType *language_ptr, + bool *is_instance_method_ptr, ConstString *language_object_name_ptr) { + return false; +} + +bool TypeSystemD::DeclContextIsContainedInLookup(void *opaque_decl_ctx, + void *other_opaque_decl_ctx) { + return false; +} + +CompilerType +TypeSystemD::CreateBaseType(DTypeKind kind, + const lldb_private::ConstString &name) { + DType *type = new DType(kind, name); + return CompilerType(this, type); +} + +CompilerType +TypeSystemD::CreateBaseType(DTypeKind kind) { + DType *type = new DType(kind, DType::GetName(kind)); + return CompilerType(this, type); +} + +CompilerType +TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(llvm::StringRef type_name, uint32_t dw_ate, uint32_t bit_size) +{ + auto IsMatch = [&](DTypeKind kind) { + llvm::Optional BS = DType::GetBitSize(kind, m_target_triple); + if (BS.hasValue()) + return BS.getValue() == bit_size; + + return false; + }; + + switch(dw_ate) + { + case DW_ATE_address: + if (IsMatch(eDTypeKindPtr)) + return CreateBaseType(eDTypeKindPtr); + break; + + case DW_ATE_boolean: + if (IsMatch(eDTypeKindBool)) + return CreateBaseType(eDTypeKindBool); + break; + + case DW_ATE_complex_float: + if (IsMatch(eDTypeKindCFloat)) + return CreateBaseType(eDTypeKindCFloat); + // check for creal and complex long double type name to be backward + // compatible with old DMD backend. + if (!type_name.empty() && (type_name == "creal" || type_name == "complex long double")) + { + if (IsMatch(eDTypeKindCReal80)) + return CreateBaseType(eDTypeKindCReal80); + if (IsMatch(eDTypeKindCReal128)) + return CreateBaseType(eDTypeKindCReal128); + if (IsMatch(eDTypeKindCReal64)) + return CreateBaseType(eDTypeKindCReal64); + // fallback to ABI-specific bit size + if (IsMatch(eDTypeKindCReal)) + return CreateBaseType(eDTypeKindCReal); + } + + if (IsMatch(eDTypeKindCDouble)) + return CreateBaseType(eDTypeKindCDouble); + break; + + case DW_ATE_float: + if (IsMatch(eDTypeKindFloat)) + return CreateBaseType(eDTypeKindFloat); + // check for real and long double type name to be backward compatible + // with old DMD backend. + if (!type_name.empty() && (type_name == "real" || type_name == "long double")) + { + if (IsMatch(eDTypeKindReal80)) + return CreateBaseType(eDTypeKindReal80); + if (IsMatch(eDTypeKindReal128)) + return CreateBaseType(eDTypeKindReal128); + if (IsMatch(eDTypeKindReal64)) + return CreateBaseType(eDTypeKindReal64); + // fallback to ABI-specific bit size + if (IsMatch(eDTypeKindReal)) + return CreateBaseType(eDTypeKindReal); + } + + if (IsMatch(eDTypeKindDouble)) + return CreateBaseType(eDTypeKindDouble); + break; + + case DW_ATE_signed: + if (IsMatch(eDTypeKindByte)) + return CreateBaseType(eDTypeKindByte); + if (IsMatch(eDTypeKindShort)) + return CreateBaseType(eDTypeKindShort); + if (IsMatch(eDTypeKindInt)) + return CreateBaseType(eDTypeKindInt); + if (IsMatch(eDTypeKindLong)) + return CreateBaseType(eDTypeKindLong); + if (IsMatch(eDTypeKindCent)) + return CreateBaseType(eDTypeKindCent); + break; + case DW_ATE_signed_char: + if (IsMatch(eDTypeKindByte)) + return CreateBaseType(eDTypeKindByte); + break; + case DW_ATE_unsigned: + if (IsMatch(eDTypeKindUByte)) + return CreateBaseType(eDTypeKindUByte); + if (IsMatch(eDTypeKindUShort)) + return CreateBaseType(eDTypeKindUShort); + if (IsMatch(eDTypeKindUInt)) + return CreateBaseType(eDTypeKindUInt); + if (IsMatch(eDTypeKindULong)) + return CreateBaseType(eDTypeKindULong); + if (IsMatch(eDTypeKindUCent)) + return CreateBaseType(eDTypeKindUCent); + break; + case DW_ATE_unsigned_char: + if (IsMatch(eDTypeKindUByte)) + return CreateBaseType(eDTypeKindUByte); + break; + case DW_ATE_UTF: + switch(bit_size) { + case 8: // UTF-8 + return CreateBaseType(eDTypeKindChar); + case 16: // UTF-16 + return CreateBaseType(eDTypeKindWChar); + case 32: // UTF-32 + return CreateBaseType(eDTypeKindDChar); + } + if (!type_name.empty()) + { + // use a string switch with backward compatible type names + DTypeKind kind = llvm::StringSwitch(type_name) + .Cases("char8_t", "char", eDTypeKindChar) + .Cases("char16_t", "wchar_t", "wchar", eDTypeKindWChar) + .Cases("char32_t", "dchar", eDTypeKindDChar) + .Default(eDTypeKindInvalid); + + if (kind != eDTypeKindInvalid) + return CreateBaseType(kind); + } + break; + default: + break; + } + + // unknown suitable builtin type + return CompilerType(); +} + +} // namespace lldb_private diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h new file mode 100644 index 000000000000..efad41a45759 --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -0,0 +1,355 @@ +//===-- TypeSystemD.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H +#define LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H + +#include "DType.h" + +#include "lldb/Core/Module.h" +#include "lldb/Symbol/TypeSystem.h" +#include "lldb/Target/Target.h" +#include "lldb/lldb-enumerations.h" + +#include "Plugins/SymbolFile/DWARF/DWARFASTParserD.h" + +namespace lldb_private { + +class TypeSystemD : public TypeSystem { + friend DWARFASTParserD; + + // LLVM RTTI support + static char ID; + +public: + // llvm casting support + bool isA(const void *ClassID) const override { return ClassID == &ID; } + static bool classof(const TypeSystem *ts) { return ts->isA(&ID); } + + explicit TypeSystemD(llvm::Triple); + ~TypeSystemD() override; + + void Finalize() override; + + static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language, + Module *module, Target *target); + + // PluginInterface functions + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + static llvm::StringRef GetPluginNameStatic() { return "dlang"; } + + static LanguageSet GetSupportedLanguages(); + + static void Initialize(); + + static void Terminate(); + + DWARFASTParser *GetDWARFParser() override; + + // Dumping types +#ifndef NDEBUG + LLVM_DUMP_METHOD void dump(lldb::opaque_compiler_type_t type) const override; + + bool Verify(lldb::opaque_compiler_type_t type) override; +#endif + + /// \see lldb_private::TypeSystem::Dump + void Dump(llvm::raw_ostream &output) override; + + void DumpValue(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, + Stream *s, lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size, + uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, + bool show_types, bool show_summary, bool verbose, + uint32_t depth) override; + + bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s, + lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size, + uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, + ExecutionContextScope *exe_scope) override; + + void DumpSummary(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, + Stream *s, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size) override; + + void DumpTypeDescription( + lldb::opaque_compiler_type_t type, + lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override; + + void DumpTypeDescription( + lldb::opaque_compiler_type_t type, Stream *s, + lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override; + + ConstString DeclGetName(void *opaque_decl) override; + + std::vector + DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name, + const bool ignore_using_decls) override; + + ConstString DeclContextGetName(void *opaque_decl_ctx) override; + + ConstString DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) override; + + bool DeclContextIsClassMethod(void *opaque_decl_ctx, + lldb::LanguageType *language_ptr, + bool *is_instance_method_ptr, + ConstString *language_object_name_ptr) override; + + bool DeclContextIsContainedInLookup(void *opaque_decl_ctx, + void *other_opaque_decl_ctx) override; + + bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type, + CompilerType *target_type, // Can pass nullptr + bool check_cplusplus, bool check_objc) override; + + bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) override; + + bool IsPointerType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) override; + + bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) override; + + bool IsReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type, bool *is_rvalue) override; + + bool IsArrayType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size, + bool *is_incomplete) override; + + bool IsVectorType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size) override; + + bool IsAggregateType(lldb::opaque_compiler_type_t type) override; + + bool IsAnonymousType(lldb::opaque_compiler_type_t type) override; + + bool IsBeingDefined(lldb::opaque_compiler_type_t type) override; + + bool IsCharType(lldb::opaque_compiler_type_t type) override; + + bool IsCompleteType(lldb::opaque_compiler_type_t type) override; + + bool IsConst(lldb::opaque_compiler_type_t type) override; + + bool IsCStringType(lldb::opaque_compiler_type_t type, + uint32_t &length) override; + + bool IsTypedefType(lldb::opaque_compiler_type_t type) override; + + bool IsPolymorphicClass(lldb::opaque_compiler_type_t type) override; + + bool IsFunctionPointerType(lldb::opaque_compiler_type_t type) override; + + bool IsBlockPointerType(lldb::opaque_compiler_type_t type, + CompilerType *function_pointer_type_ptr) override; + + bool IsIntegerType(lldb::opaque_compiler_type_t type, + bool &is_signed) override; + + bool IsEnumerationType(lldb::opaque_compiler_type_t type, + bool &is_signed) override; + + bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) override; + + uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type, + CompilerType *base_type_ptr) override; + + bool IsScalarType(lldb::opaque_compiler_type_t type) override; + + bool IsFunctionType(lldb::opaque_compiler_type_t type) override; + + bool IsVoidType(lldb::opaque_compiler_type_t type) override; + + bool IsDefined(lldb::opaque_compiler_type_t type) override; + + bool IsFloatingPointType(lldb::opaque_compiler_type_t type, uint32_t &count, + bool &is_complex) override; + + bool CanPassInRegisters(const CompilerType &type) override; + + bool SupportsLanguage(lldb::LanguageType language) override; + + void ForEachEnumerator( + lldb::opaque_compiler_type_t type, + std::function const &callback) override; + + CompilerType GetTypedefedType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, + size_t bit_size) override; + + CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) override; + + llvm::Optional + GetBitSize(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) override; + + lldb::Encoding GetEncoding(lldb::opaque_compiler_type_t type, + uint64_t &count) override; + + lldb::Format GetFormat(lldb::opaque_compiler_type_t type) override; + + lldb::LanguageType + GetMinimumLanguage(lldb::opaque_compiler_type_t type) override; + + lldb::TypeClass GetTypeClass(lldb::opaque_compiler_type_t type) override; + + unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) override; + + CompilerType GetTypeForDecl(void *opaque_decl) override; + + CompilerType GetChildCompilerTypeAtIndex( + lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx, + bool transparent_pointers, bool omit_empty_base_classes, + bool ignore_array_bounds, std::string &child_name, + uint32_t &child_byte_size, int32_t &child_byte_offset, + uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset, + bool &child_is_base_class, bool &child_is_deref_of_parent, + ValueObject *valobj, uint64_t &language_flags) override; + + uint32_t GetNumFields(lldb::opaque_compiler_type_t type) override; + + CompilerType GetFieldAtIndex(lldb::opaque_compiler_type_t type, size_t idx, + std::string &name, uint64_t *bit_offset_ptr, + uint32_t *bitfield_bit_size_ptr, + bool *is_bitfield_ptr) override; + + uint32_t GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) override; + + uint32_t GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) override; + + CompilerType GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, + uint32_t *bit_offset_ptr) override; + + CompilerType GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, + uint32_t *bit_offset_ptr) override; + + uint32_t GetIndexOfChildWithName(lldb::opaque_compiler_type_t type, + const char *name, + bool omit_empty_base_classes) override; + + size_t + GetIndexOfChildMemberWithName(lldb::opaque_compiler_type_t type, + const char *name, bool omit_empty_base_classes, + std::vector &child_indexes) override; + + size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override; + + lldb::TemplateArgumentKind + GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, + size_t idx) override; + CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) override; + llvm::Optional + GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) override; + + CompilerType GetTypeForFormatters(void *type) override; + + llvm::Optional + GetTypeBitAlign(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) override; + + uint32_t GetNumChildren(lldb::opaque_compiler_type_t type, + bool omit_empty_base_classes, + const ExecutionContext *exe_ctx) override; + + CompilerType GetBuiltinTypeByName(ConstString name) override; + + lldb::BasicType + GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) override; + + CompilerType GetArrayElementType(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) override; + + CompilerType GetArrayType(lldb::opaque_compiler_type_t type, + uint64_t size) override; + + CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) override; + + // Returns -1 if this isn't a function of if the function doesn't have a + // prototype Returns a value >= 0 if there is a prototype. + int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) override; + + CompilerType GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) override; + + CompilerType + GetFunctionReturnType(lldb::opaque_compiler_type_t type) override; + + size_t GetNumMemberFunctions(lldb::opaque_compiler_type_t type) override; + + TypeMemberFunctionImpl + GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) override; + + CompilerType GetNonReferenceType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetPointeeType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetPointerType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetLValueReferenceType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetRValueReferenceType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetAtomicType(lldb::opaque_compiler_type_t type) override; + + CompilerType AddConstModifier(lldb::opaque_compiler_type_t type) override; + + CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type) override; + + CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type) override; + + const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) override; + + bool GetCompleteType(lldb::opaque_compiler_type_t type) override; + + ConstString GetTypeName(lldb::opaque_compiler_type_t type) override; + + ConstString GetDisplayTypeName(lldb::opaque_compiler_type_t type) override; + + uint32_t GetTypeInfo(lldb::opaque_compiler_type_t type, + CompilerType *pointee_or_element_compiler_type) override; + + uint32_t GetPointerByteSize() override; + + size_t + GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) override; + + CompilerType GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type, + const size_t index) override; + +private: + CompilerType CreateBaseType(DTypeKind kind, + const lldb_private::ConstString &name); + CompilerType CreateBaseType(DTypeKind kind); + + CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(llvm::StringRef type_name, uint32_t dw_ate, uint32_t bit_size); + + std::unique_ptr m_dwarf_ast_parser_up; + llvm::Triple m_target_triple; +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H diff --git a/lldb/source/Symbol/SymbolFile.cpp b/lldb/source/Symbol/SymbolFile.cpp index 557c55699137..b85901af4d67 100644 --- a/lldb/source/Symbol/SymbolFile.cpp +++ b/lldb/source/Symbol/SymbolFile.cpp @@ -125,9 +125,7 @@ void SymbolFile::FindFunctions(const RegularExpression ®ex, void SymbolFile::GetMangledNamesForFunction( const std::string &scope_qualified_name, - std::vector &mangled_names) { - return; -} + std::vector &mangled_names) {} void SymbolFile::FindTypes( ConstString name, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Target/ThreadPlanStack.cpp b/lldb/source/Target/ThreadPlanStack.cpp index f09583cc50cc..80634647f9e0 100644 --- a/lldb/source/Target/ThreadPlanStack.cpp +++ b/lldb/source/Target/ThreadPlanStack.cpp @@ -210,7 +210,6 @@ void ThreadPlanStack::DiscardAllPlans() { for (int i = stack_size - 1; i > 0; i--) { DiscardPlan(); } - return; } void ThreadPlanStack::DiscardConsultingControllingPlans() { diff --git a/lldb/source/Target/UnwindLLDB.cpp b/lldb/source/Target/UnwindLLDB.cpp index 047147112f3b..77dd19b04ebd 100644 --- a/lldb/source/Target/UnwindLLDB.cpp +++ b/lldb/source/Target/UnwindLLDB.cpp @@ -312,7 +312,6 @@ void UnwindLLDB::UpdateUnwindPlanForFirstFrameIfInvalid(ABI *abi) { // Restore status after calling AddOneMoreFrame m_unwind_complete = old_m_unwind_complete; m_candidate_frame = old_m_candidate_frame; - return; } bool UnwindLLDB::AddOneMoreFrame(ABI *abi) { diff --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py index c894b80228cf..7763305b58db 100644 --- a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py +++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py @@ -115,8 +115,7 @@ def test(self): self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character array", string_expr)) self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character", string_expr)) self.assertIn('= ..90zaZA....... \n', self.getFormatted("printable character", string_expr)) - # FIXME: This should probably print the characters in the uint128_t. - self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("unicode8", string_expr)) + self.assertIn('= 0x00 0x1b 0x39 0x30 0x7a 0x61 0x5a 0x41 0x0b 0x09 0x0d 0x0a 0x0c 0x08 0x07 0x20\n', self.getFormatted("unicode8", string_expr)) # OSType ostype_expr = "(__UINT64_TYPE__)0x" @@ -137,6 +136,9 @@ def test(self): # bytes with ASCII self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring")) + # unicode8 + self.assertIn('= 0x78 0x56 0x34 0x12\n', self.getFormatted("unicode8", "0x12345678")) + # unicode16 self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678")) diff --git a/lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py b/lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py new file mode 100644 index 000000000000..98c87461a608 --- /dev/null +++ b/lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py @@ -0,0 +1,44 @@ +import os +import struct +import subprocess + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class FreeBSDKernelVMCoreTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + mydir = TestBase.compute_mydir(__file__) + + def test_mem(self): + kernel_exec = "/boot/kernel/kernel" + mem_device = "/dev/mem" + + if not os.access(kernel_exec, os.R_OK): + self.skipTest("Kernel @ %s is not readable" % (kernel_exec,)) + if not os.access(mem_device, os.R_OK): + self.skipTest("Memory @ %s is not readable" % (mem_device,)) + + target = self.dbg.CreateTarget(kernel_exec) + process = target.LoadCore(mem_device) + hz_value = int(subprocess.check_output(["sysctl", "-n", "kern.hz"])) + + self.assertTrue(process, PROCESS_IS_VALID) + self.assertEqual(process.GetNumThreads(), 1) + self.assertEqual(process.GetProcessID(), 0) + + # test memory reading + self.expect("expr -- *(int *) &hz", + substrs=["(int) $0 = %d" % hz_value]) + + main_mod = target.GetModuleAtIndex(0) + hz_addr = (main_mod.FindSymbols("hz")[0].symbol.addr + .GetLoadAddress(target)) + error = lldb.SBError() + self.assertEqual(process.ReadMemory(hz_addr, 4, error), + struct.pack("$uuid"', '', 'echo "DBGDSYMPath$dsym"', @@ -88,30 +236,11 @@ def initial_setup(self): 'exit $ret' ] - with open(self.dsym_for_uuid, "w") as writer: + with open(dsym_for_uuid, "w") as writer: for l in shell_cmds: writer.write(l + '\n') - os.chmod(self.dsym_for_uuid, 0o755) - - self.slide = 0x70000000000 - - ### Create our corefile - # 0xffffffffffffffff means load address unknown - retcode = call(self.create_corefile + " version-string " + self.verstr_corefile + " " + self.aout_exe + " 0xffffffffffffffff", shell=True) - retcode = call(self.create_corefile + " version-string " + self.verstr_corefile_addr + " " + self.aout_exe + (" 0x%x" % self.slide), shell=True) - retcode = call(self.create_corefile + " main-bin-spec " + self.binspec_corefile + " " + self.aout_exe + " 0xffffffffffffffff", shell=True) - retcode = call(self.create_corefile + " main-bin-spec " + self.binspec_corefile_addr + " " + self.aout_exe + (" 0x%x" % self.slide), shell=True) - - @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") - @skipIf(archs=no_match(['x86_64'])) - @skipUnlessDarwin - def test_lc_note_version_string(self): - self.initial_setup() - - if self.TraceOn(): - self.runCmd("log enable lldb dyld host") - self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) + os.chmod(dsym_for_uuid, 0o755) ### Now run lldb on the corefile ### which will give us a UUID @@ -119,135 +248,35 @@ def test_lc_note_version_string(self): ### which gives us a binary and dSYM ### which lldb should load! - # First, try the "kern ver str" corefile - self.target = self.dbg.CreateTarget('') - err = lldb.SBError() - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.verstr_corefile) - self.process = self.target.LoadCore(self.verstr_corefile) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - # Second, try the "kern ver str" corefile where it loads at an address - self.target = self.dbg.CreateTarget('') - err = lldb.SBError() - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.verstr_corefile_addr) - self.process = self.target.LoadCore(self.verstr_corefile_addr) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - main_sym = self.target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) - main_addr = main_sym.GetStartAddress() - self.assertGreater(main_addr.GetLoadAddress(self.target), self.slide) - self.assertNotEqual(main_addr.GetLoadAddress(self.target), lldb.LLDB_INVALID_ADDRESS) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") - @skipIf(archs=no_match(['x86_64'])) - @skipUnlessDarwin - def test_lc_note_main_bin_spec(self): - self.initial_setup() - - if self.TraceOn(): - self.runCmd("log enable lldb dyld host") - self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) - - # Third, try the "main bin spec" corefile - self.target = self.dbg.CreateTarget('') - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.binspec_corefile) - self.process = self.target.LoadCore(self.binspec_corefile) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - # Fourth, try the "main bin spec" corefile where it loads at an address - self.target = self.dbg.CreateTarget('') - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.binspec_corefile_addr) - self.process = self.target.LoadCore(self.binspec_corefile_addr) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - main_sym = self.target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) - main_addr = main_sym.GetStartAddress() - self.assertGreater(main_addr.GetLoadAddress(self.target), self.slide) - self.assertNotEqual(main_addr.GetLoadAddress(self.target), lldb.LLDB_INVALID_ADDRESS) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") - @skipIf(archs=no_match(['x86_64'])) - @skipUnlessDarwin - def test_lc_note_main_bin_spec_os_plugin(self): - self.initial_setup() - if self.TraceOn(): self.runCmd("log enable lldb dyld host") self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) # Now load the binary and confirm that we load the OS plugin. - self.target = self.dbg.CreateTarget('') + target = self.dbg.CreateTarget('') if self.TraceOn(): - self.runCmd("script print('loading corefile %s with OS plugin')" % self.binspec_corefile_addr) - self.process = self.target.LoadCore(self.binspec_corefile_addr) - self.assertEqual(self.process.IsValid(), True) + self.runCmd("script print('loading corefile %s with OS plugin')" % binspec_corefile_addr) + + process = target.LoadCore(binspec_corefile_addr) + self.assertEqual(process.IsValid(), True) if self.TraceOn(): self.runCmd("image list") self.runCmd("target mod dump sections") self.runCmd("thread list") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) # Verify our OS plug-in threads showed up - thread = self.process.GetThreadByID(0x111111111) + thread = process.GetThreadByID(0x111111111) self.assertTrue(thread.IsValid(), "Make sure there is a thread 0x111111111 after we load the python OS plug-in") - thread = self.process.GetThreadByID(0x222222222) + thread = process.GetThreadByID(0x222222222) self.assertTrue(thread.IsValid(), "Make sure there is a thread 0x222222222 after we load the python OS plug-in") - thread = self.process.GetThreadByID(0x333333333) + thread = process.GetThreadByID(0x333333333) self.assertTrue(thread.IsValid(), "Make sure there is a thread 0x333333333 after we load the python OS plug-in") - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() + self.runCmd("settings clear target.process.python-os-plugin-path") + self.dbg.DeleteTarget(target) diff --git a/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp b/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp index 2845fd453150..8bd6aaabecd6 100644 --- a/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp +++ b/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp @@ -20,9 +20,10 @@ struct main_bin_spec_payload { uint32_t version; uint32_t type; uint64_t address; + uint64_t slide; uuid_t uuid; uint32_t log2_pagesize; - uint32_t unused; + uint32_t platform; }; union uint32_buf { @@ -49,33 +50,36 @@ void add_uint32(std::vector &buf, uint32_t val) { buf.push_back(conv.bytebuf[i]); } -std::vector x86_lc_thread_load_command() { +std::vector lc_thread_load_command(cpu_type_t cputype) { std::vector data; - add_uint32(data, LC_THREAD); // thread_command.cmd - add_uint32(data, 184); // thread_command.cmdsize - add_uint32(data, x86_THREAD_STATE64); // thread_command.flavor - add_uint32(data, x86_THREAD_STATE64_COUNT); // thread_command.count - add_uint64(data, 0x0000000000000000); // rax - add_uint64(data, 0x0000000000000400); // rbx - add_uint64(data, 0x0000000000000000); // rcx - add_uint64(data, 0x0000000000000000); // rdx - add_uint64(data, 0x0000000000000000); // rdi - add_uint64(data, 0x0000000000000000); // rsi - add_uint64(data, 0xffffff9246e2ba20); // rbp - add_uint64(data, 0xffffff9246e2ba10); // rsp - add_uint64(data, 0x0000000000000000); // r8 - add_uint64(data, 0x0000000000000000); // r9 - add_uint64(data, 0x0000000000000000); // r10 - add_uint64(data, 0x0000000000000000); // r11 - add_uint64(data, 0xffffff7f96ce5fe1); // r12 - add_uint64(data, 0x0000000000000000); // r13 - add_uint64(data, 0x0000000000000000); // r14 - add_uint64(data, 0xffffff9246e2bac0); // r15 - add_uint64(data, 0xffffff8015a8f6d0); // rip - add_uint64(data, 0x0000000000011111); // rflags - add_uint64(data, 0x0000000000022222); // cs - add_uint64(data, 0x0000000000033333); // fs - add_uint64(data, 0x0000000000044444); // gs + // Emit an LC_THREAD register context appropriate for the cputype + // of the binary we're embedded. The tests in this case do not + // use the register values, so 0's are fine, lldb needs to see at + // least one LC_THREAD in the corefile. +#if defined(__x86_64__) + if (cputype == CPU_TYPE_X86_64) { + add_uint32(data, LC_THREAD); // thread_command.cmd + add_uint32(data, + 16 + (x86_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize + add_uint32(data, x86_THREAD_STATE64); // thread_command.flavor + add_uint32(data, x86_THREAD_STATE64_COUNT); // thread_command.count + for (int i = 0; i < x86_THREAD_STATE64_COUNT; i++) { + add_uint32(data, 0); // whatever, just some empty register values + } + } +#endif +#if defined(__arm64__) || defined(__aarch64__) + if (cputype == CPU_TYPE_ARM64) { + add_uint32(data, LC_THREAD); // thread_command.cmd + add_uint32(data, + 16 + (ARM_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize + add_uint32(data, ARM_THREAD_STATE64); // thread_command.flavor + add_uint32(data, ARM_THREAD_STATE64_COUNT); // thread_command.count + for (int i = 0; i < ARM_THREAD_STATE64_COUNT; i++) { + add_uint32(data, 0); // whatever, just some empty register values + } + } +#endif return data; } @@ -121,7 +125,8 @@ void add_lc_note_kern_ver_str_load_command( void add_lc_note_main_bin_spec_load_command( std::vector> &loadcmds, std::vector &payload, - int payload_file_offset, std::string uuidstr, uint64_t address) { + int payload_file_offset, std::string uuidstr, uint64_t address, + uint64_t slide) { std::vector loadcmd_data; add_uint32(loadcmd_data, LC_NOTE); // note_command.cmd @@ -145,15 +150,16 @@ void add_lc_note_main_bin_spec_load_command( loadcmds.push_back(loadcmd_data); // Now write the "main bin spec" payload. - add_uint32(payload, 1); // version + add_uint32(payload, 2); // version add_uint32(payload, 3); // type == 3 [ firmware, standalone, etc ] add_uint64(payload, address); // load address + add_uint64(payload, slide); // slide uuid_t uuid; uuid_parse(uuidstr.c_str(), uuid); for (int i = 0; i < sizeof(uuid_t); i++) payload.push_back(uuid[i]); add_uint32(payload, 0); // log2_pagesize unspecified - add_uint32(payload, 0); // unused + add_uint32(payload, 0); // platform unspecified } void add_lc_segment(std::vector> &loadcmds, @@ -179,7 +185,8 @@ void add_lc_segment(std::vector> &loadcmds, loadcmds.push_back(loadcmd_data); } -std::string get_uuid_from_binary(const char *fn) { +std::string get_uuid_from_binary(const char *fn, cpu_type_t &cputype, + cpu_subtype_t &cpusubtype) { FILE *f = fopen(fn, "r"); if (f == nullptr) { fprintf(stderr, "Unable to open binary '%s' to get uuid\n", fn); @@ -213,9 +220,9 @@ std::string get_uuid_from_binary(const char *fn) { fprintf(stderr, "error reading mach header from input file\n"); exit(1); } - if (mh.cputype != CPU_TYPE_X86_64) { + if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) { fprintf(stderr, - "This tool creates an x86_64 corefile but " + "This tool creates an x86_64/arm64 corefile but " "the supplied binary '%s' is cputype 0x%x\n", fn, (uint32_t)mh.cputype); exit(1); @@ -223,15 +230,17 @@ std::string get_uuid_from_binary(const char *fn) { num_of_load_cmds = mh.ncmds; size_of_load_cmds = mh.sizeofcmds; file_offset += sizeof(struct mach_header); + cputype = mh.cputype; + cpusubtype = mh.cpusubtype; } else { struct mach_header_64 mh; if (::fread(&mh, 1, sizeof(mh), f) != sizeof(mh)) { fprintf(stderr, "error reading mach header from input file\n"); exit(1); } - if (mh.cputype != CPU_TYPE_X86_64) { + if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) { fprintf(stderr, - "This tool creates an x86_64 corefile but " + "This tool creates an x86_64/arm64 corefile but " "the supplied binary '%s' is cputype 0x%x\n", fn, (uint32_t)mh.cputype); exit(1); @@ -239,6 +248,8 @@ std::string get_uuid_from_binary(const char *fn) { num_of_load_cmds = mh.ncmds; size_of_load_cmds = mh.sizeofcmds; file_offset += sizeof(struct mach_header_64); + cputype = mh.cputype; + cpusubtype = mh.cpusubtype; } off_t load_cmds_offset = file_offset; @@ -269,12 +280,15 @@ std::string get_uuid_from_binary(const char *fn) { } int main(int argc, char **argv) { - if (argc != 5) { - fprintf(stderr, - "usage: create-empty-corefile version-string|main-bin-spec " - "
\n"); + if (argc != 6) { + fprintf( + stderr, + "usage: create-empty-corefile version-string|main-bin-spec " + "
\n"); fprintf(stderr, "
is base 16, 0xffffffffffffffff means unknown\n"); + fprintf(stderr, + " is base 16, 0xffffffffffffffff means unknown\n"); fprintf( stderr, "Create a Mach-O corefile with an either LC_NOTE 'kern ver str' or \n"); @@ -289,7 +303,9 @@ int main(int argc, char **argv) { exit(1); } - std::string uuid = get_uuid_from_binary(argv[3]); + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + std::string uuid = get_uuid_from_binary(argv[3], cputype, cpusubtype); // An array of load commands (in the form of byte arrays) std::vector> load_commands; @@ -304,15 +320,22 @@ int main(int argc, char **argv) { exit(1); } + errno = 0; + uint64_t slide = strtoull(argv[5], NULL, 16); + if (errno != 0) { + fprintf(stderr, "Unable to parse slide %s as base 16", argv[4]); + exit(1); + } + // First add all the load commands / payload so we can figure out how large // the load commands will actually be. - load_commands.push_back(x86_lc_thread_load_command()); + load_commands.push_back(lc_thread_load_command(cputype)); if (strcmp(argv[1], "version-string") == 0) add_lc_note_kern_ver_str_load_command(load_commands, payload, 0, uuid, address); else add_lc_note_main_bin_spec_load_command(load_commands, payload, 0, uuid, - address); + address, slide); add_lc_segment(load_commands, payload, 0); int size_of_load_commands = 0; @@ -327,22 +350,22 @@ int main(int argc, char **argv) { load_commands.clear(); payload.clear(); - load_commands.push_back(x86_lc_thread_load_command()); + load_commands.push_back(lc_thread_load_command(cputype)); if (strcmp(argv[1], "version-string") == 0) add_lc_note_kern_ver_str_load_command( load_commands, payload, header_and_load_cmd_room, uuid, address); else add_lc_note_main_bin_spec_load_command( - load_commands, payload, header_and_load_cmd_room, uuid, address); + load_commands, payload, header_and_load_cmd_room, uuid, address, slide); add_lc_segment(load_commands, payload, header_and_load_cmd_room); struct mach_header_64 mh; mh.magic = MH_MAGIC_64; - mh.cputype = CPU_TYPE_X86_64; + mh.cputype = cputype; - mh.cpusubtype = CPU_SUBTYPE_X86_64_ALL; + mh.cpusubtype = cpusubtype; mh.filetype = MH_CORE; mh.ncmds = load_commands.size(); mh.sizeofcmds = size_of_load_commands; diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp index 5b1154d2cd8b..e78a084f59f0 100644 --- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp +++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp @@ -53,6 +53,8 @@ TEST_F(TestTypeSystemClang, TestGetBasicTypeFromEnum) { context.hasSameType(GetBasicQualType(eBasicTypeBool), context.BoolTy)); EXPECT_TRUE( context.hasSameType(GetBasicQualType(eBasicTypeChar), context.CharTy)); + EXPECT_TRUE(context.hasSameType(GetBasicQualType(eBasicTypeChar8), + context.Char8Ty)); EXPECT_TRUE(context.hasSameType(GetBasicQualType(eBasicTypeChar16), context.Char16Ty)); EXPECT_TRUE(context.hasSameType(GetBasicQualType(eBasicTypeChar32), diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 1d97626d69cc..327b8e0ba2e7 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -254,14 +254,6 @@ function(add_link_opts target_name) # Don't use linker optimizations in debug builds since it slows down the # linker in a context where the optimizations are not important. if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - - # Pass -O3 to the linker. This enabled different optimizations on different - # linkers. - if(NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin|SunOS|AIX|OS390" OR WIN32) AND in_distribution) - set_property(TARGET ${target_name} APPEND_STRING PROPERTY - LINK_FLAGS " -Wl,-O3") - endif() - if(NOT LLVM_NO_DEAD_STRIP) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # ld64's implementation of -dead_strip breaks tools that use plugins. diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake index 6c79e2f49784..9427a26de8be 100644 --- a/llvm/cmake/modules/TensorFlowCompile.cmake +++ b/llvm/cmake/modules/TensorFlowCompile.cmake @@ -27,15 +27,13 @@ function(tf_get_model model final_path) endfunction() # Generate a mock model for tests. -function(generate_mock_model model generate_mock_model_py config) - tf_get_absolute_path(${model} ${CMAKE_CURRENT_BINARY_DIR} LLVM_ML_MODELS_ABSOLUTE) - tf_get_absolute_path(${generate_mock_model_py} ${CMAKE_CURRENT_SOURCE_DIR} GENERATED_MODEL_ABSOLUTE_PATH) - tf_get_absolute_path(${config} ${CMAKE_CURRENT_SOURCE_DIR} LLVM_ML_MODEL_CONFIG_ABSOLUTE) +function(generate_mock_model generator output) + tf_get_absolute_path(${generator} ${CMAKE_CURRENT_SOURCE_DIR} generator_absolute_path) + tf_get_absolute_path(${output} ${CMAKE_CURRENT_BINARY_DIR} output_absolute_path) message(WARNING "Autogenerated mock models should not be used in production builds.") execute_process(COMMAND python3 - ${GENERATED_MODEL_ABSOLUTE_PATH} - ${LLVM_ML_MODEL_CONFIG_ABSOLUTE} - ${LLVM_ML_MODELS_ABSOLUTE}-autogenerated + ${generator_absolute_path} + ${output_absolute_path} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) endfunction() @@ -86,19 +84,19 @@ function(tfcompile model tag_set signature_def_key fname cpp_class) endfunction() -function(tf_find_and_compile model default_url default_path generation_config tag_set signature_def_key fname cpp_class) +function(tf_find_and_compile model default_url default_path test_model_generator tag_set signature_def_key fname cpp_class) if ("${model}" STREQUAL "download") # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED" - if ("${LLVM_INLINER_MODEL_CURRENT_URL}" STREQUAL "TO_BE_UPDATED") - message(FATAL_ERROR "LLVM_INLINER_MODEL_PATH was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.") + if ("${default_url}" STREQUAL "TO_BE_UPDATED") + message(FATAL_ERROR "Default URL was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.") endif() set(model ${default_url}) endif() if ("${model}" STREQUAL "autogenerate") - generate_mock_model(${default_path} models/generate_mock_model.py ${generation_config}) - set(model ${default_path}-autogenerated) + set(model ${default_path}-autogenerated) + generate_mock_model(${test_model_generator} ${model}) endif() tf_get_model(${model} LLVM_ML_MODELS_ABSOLUTE) diff --git a/llvm/docs/CompilerWriterInfo.rst b/llvm/docs/CompilerWriterInfo.rst index 9184b852539d..0b02b40551a6 100644 --- a/llvm/docs/CompilerWriterInfo.rst +++ b/llvm/docs/CompilerWriterInfo.rst @@ -119,6 +119,12 @@ SystemZ * `z/Architecture Principles of Operation (registration required, free sign-up) `_ +VE +-- + +* `NEC SX-Aurora TSUBASA ISA Guide `_ +* `NEC SX-Aurora TSUBASA manuals and documentation `_ + X86 --- diff --git a/llvm/docs/HowToAddABuilder.rst b/llvm/docs/HowToAddABuilder.rst index 24837f7c580a..5bb82489262e 100644 --- a/llvm/docs/HowToAddABuilder.rst +++ b/llvm/docs/HowToAddABuilder.rst @@ -80,7 +80,7 @@ Here are the steps you can follow to do so: to authenticate your buildbot-worker. #. Create a buildbot-worker in context of that buildbot-worker account. Point it - to the **lab.llvm.org** port **9990** (see `Buildbot documentation, + to the **lab.llvm.org** port **9994** (see `Buildbot documentation, Creating a worker `_ for more details) by running the following command: @@ -88,12 +88,14 @@ Here are the steps you can follow to do so: .. code-block:: bash $ buildbot-worker create-worker \ - lab.llvm.org:9990 \ + lab.llvm.org:9994 \ \ - To point a worker to silent master please use lab.llvm.org:9994 instead - of lab.llvm.org:9990. + This will cause your new worker to connect to the staging buildmaster + which is silent by default. Only once a new worker is stable, and + approval from Galina has been received (see last step) should it + be pointed at the main buildmaster. #. Fill the buildbot-worker description and admin name/e-mail. Here is an example of the buildbot-worker description:: @@ -140,13 +142,25 @@ Here are the steps you can follow to do so: will let you know that your changes are applied and buildmaster is reconfigured. -#. Check the status of your buildbot-worker on the `Waterfall Display - `_ to make sure it is connected, - and the `Workers Display `_ to see if - administrator contact and worker information are correct. - -#. Wait for the first build to succeed and enjoy. - +#. Check the status of your buildbot-worker on the `Waterfall Display (Staging) + `_ to make sure it is + connected, and the `Workers Display (Staging) + `_ to see if administrator + contact and worker information are correct. + +#. At this point, you have a working builder connected to the staging + buildmaster. You can now make sure it is reliably green and keeps + up with the build queue. No notifications will be sent, so you can + keep an unstable builder connected to staging indefinitely. + +#. (Optional) Once the builder is stable on the staging buildmaster with + several days of green history, you can chose to move it to the production + buildmaster to enable developer notifications. Please email `Galina + Kistanova `_ for review and approval. + + To move a worker to production (once approved), stop your worker, edit the + buildbot.tac file to change the port number from 9994 to 9990 and start it + again. Best Practices for Configuring a Fast Builder ============================================= diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c b/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c index 93d36a40b12f..c557fc7795e5 100644 --- a/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c @@ -159,7 +159,6 @@ void Materialize(void *Ctx, LLVMOrcMaterializationResponsibilityRef MR) { LLVMOrcIRTransformLayerRef IRLayer = LLVMOrcLLJITGetIRTransformLayer(J); LLVMOrcIRTransformLayerEmit(IRLayer, MR, TSM); } - return; } int main(int argc, char *argv[]) { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c7572dd674c8..d9f5c9689d5c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1298,13 +1298,12 @@ class TargetTransformInfo { bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - /// \returns True if the caller and callee agree on how \p Args will be passed + /// \returns True if the caller and callee agree on how \p Types will be + /// passed to or returned from the callee. /// to the callee. - /// \param[out] Args The list of compatible arguments. The implementation may - /// filter out any incompatible args from this list. - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const; + /// \param Types List of types to check. + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const; /// The type of load/store indexing. enum MemIndexedMode { @@ -1735,9 +1734,9 @@ class TargetTransformInfo::Concept { unsigned SrcAlign, unsigned DestAlign) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; - virtual bool - areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const = 0; + virtual bool areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef &Types) const = 0; virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; @@ -2299,10 +2298,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { const Function *Callee) const override { return Impl.areInlineCompatible(Caller, Callee); } - bool areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const override { - return Impl.areFunctionArgsABICompatible(Caller, Callee, Args); + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const override { + return Impl.areTypesABICompatible(Caller, Callee, Types); } bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 1bcfe24e30d7..26a696f09b3d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -708,9 +708,8 @@ class TargetTransformInfoImplBase { Callee->getFnAttribute("target-features")); } - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const { + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const { return (Caller->getFnAttribute("target-cpu") == Callee->getFnAttribute("target-cpu")) && (Caller->getFnAttribute("target-features") == diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 74615c73741a..044f2e22cfdd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -192,6 +192,10 @@ class LegalizerHelper { SmallVectorImpl &VRegs, SmallVectorImpl &LeftoverVRegs); + /// Version which handles irregular sub-vector splits. + void extractVectorParts(Register Reg, unsigned NumElst, + SmallVectorImpl &VRegs); + /// Helper function to build a wide generic register \p DstReg of type \p /// RegTy from smaller parts. This will produce a G_MERGE_VALUES, /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate @@ -205,6 +209,11 @@ class LegalizerHelper { LLT PartTy, ArrayRef PartRegs, LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); + /// Merge \p PartRegs with different types into \p DstReg. + void mergeMixedSubvectors(Register DstReg, ArrayRef PartRegs); + + void appendVectorElts(SmallVectorImpl &Elts, Register Reg); + /// Unmerge \p SrcReg into smaller sized values, and append them to \p /// Parts. The elements of \p Parts will be the greatest common divisor type /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and @@ -285,26 +294,18 @@ class LegalizerHelper { /// vector bounds. Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index); - LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); - - /// Legalize a instruction with a vector type where each operand may have a - /// different element type. All type indexes must have the same number of - /// elements. - LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); + /// Handles most opcodes. Split \p MI into same instruction on sub-vectors or + /// scalars with \p NumElts elements (1 for scalar). Supports uneven splits: + /// there can be leftover sub-vector with fewer then \p NumElts or a leftover + /// scalar. To avoid this use moreElements first and set MI number of elements + /// to multiple of \p NumElts. Non-vector operands that should be used on all + /// sub-instructions without split are listed in \p NonVecOpIndices. + LegalizeResult fewerElementsVectorMultiEltType( + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list NonVecOpIndices = {}); - LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - - LegalizeResult - fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - - LegalizeResult - fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - - LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts); LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); @@ -320,22 +321,9 @@ class LegalizerHelper { unsigned TypeIdx, LLT NarrowTy); - LegalizeResult fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy); - /// Legalize an instruction by reducing the operation width, either by - /// narrowing the type of the operation or by reducing the number of elements - /// of a vector. - /// The used strategy (narrow vs. fewerElements) is decided by \p NarrowTy. - /// Narrow is used if the scalar type of \p NarrowTy and \p DstTy differ, - /// fewerElements is used when the scalar type is the same but the number of - /// elements between \p NarrowTy and \p DstTy differ. - LegalizeResult reduceOperationWidth(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 68c14240ebc7..0b37539030b1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -58,7 +58,10 @@ enum LegalizeAction : std::uint8_t { /// The (vector) operation should be implemented by splitting it into /// sub-vectors where the operation is legal. For example a <8 x s64> add - /// might be implemented as 4 separate <2 x s64> adds. + /// might be implemented as 4 separate <2 x s64> adds. There can be a leftover + /// if there are not enough elements for last sub-vector e.g. <7 x s64> add + /// will be implemented as 3 separate <2 x s64> adds and one s64 add. Leftover + /// types can be avoided by doing MoreElements first. FewerElements, /// The (vector) operation should be implemented by widening the input @@ -1050,6 +1053,26 @@ class LegalizeRuleSet { TypeIdx, LLT::fixed_vector(MinElements, VecTy.getElementType())); }); } + + /// Set number of elements to nearest larger multiple of NumElts. + LegalizeRuleSet &alignNumElementsTo(unsigned TypeIdx, const LLT EltTy, + unsigned NumElts) { + typeIdx(TypeIdx); + return actionIf( + LegalizeAction::MoreElements, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return VecTy.isVector() && VecTy.getElementType() == EltTy && + (VecTy.getNumElements() % NumElts != 0); + }, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + unsigned NewSize = alignTo(VecTy.getNumElements(), NumElts); + return std::make_pair( + TypeIdx, LLT::fixed_vector(NewSize, VecTy.getElementType())); + }); + } + /// Limit the number of elements in EltTy vectors to at most MaxElements. LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements) { @@ -1085,6 +1108,19 @@ class LegalizeRuleSet { .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements()); } + /// Express \p EltTy vectors strictly using vectors with \p NumElts elements + /// (or scalars when \p NumElts equals 1). + /// First pad with undef elements to nearest larger multiple of \p NumElts. + /// Then perform split with all sub-instructions having the same type. + /// Using clampMaxNumElements (non-strict) can result in leftover instruction + /// with different type (fewer elements then \p NumElts or scalar). + /// No effect if the type is not a vector. + LegalizeRuleSet &clampMaxNumElementsStrict(unsigned TypeIdx, const LLT EltTy, + unsigned NumElts) { + return alignNumElementsTo(TypeIdx, EltTy, NumElts) + .clampMaxNumElements(TypeIdx, EltTy, NumElts); + } + /// Fallback on the previous implementation. This should only be used while /// porting a rule. LegalizeRuleSet &fallback() { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 069f71b54328..fde0cb3cf1af 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -497,6 +497,34 @@ class MachineIRBuilder { MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits); + /// Build and insert + /// a, b, ..., x = G_UNMERGE_VALUES \p Op0 + /// \p Res = G_BUILD_VECTOR a, b, ..., x, undef, ..., undef + /// + /// Pad \p Op0 with undef elements to match number of elements in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with vector type, + /// same vector element type and Op0 must have fewer elements then Res. + /// + /// \return a MachineInstrBuilder for the newly created build vector instr. + MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0); + + /// Build and insert + /// a, b, ..., x, y, z = G_UNMERGE_VALUES \p Op0 + /// \p Res = G_BUILD_VECTOR a, b, ..., x + /// + /// Delete trailing elements in \p Op0 to match number of elements in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with vector type, + /// same vector element type and Op0 must have more elements then Res. + /// + /// \return a MachineInstrBuilder for the newly created build vector instr. + MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0); + /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1 /// /// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 4126e2ac7b8f..8fed79585fe9 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -323,6 +323,11 @@ Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy); +LLVM_READNONE +/// Return smallest type that covers both \p OrigTy and \p TargetTy and is +/// multiple of TargetTy. +LLT getCoverTy(LLT OrigTy, LLT TargetTy); + /// Return a type where the total size is the greatest common divisor of \p /// OrigTy and \p TargetTy. This will try to either change the number of vector /// elements, or bitwidth of scalars. The intent is the result type can be used diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 83ce3d017cc5..b2d82e0cc6e8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3620,6 +3620,13 @@ class TargetLowering : public TargetLoweringBase { const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const; + + /// Return true if vector \p Op has the same value across all \p DemandedElts, + /// indicating any elements which may be undef in the output \p UndefElts. + virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth = 0) const; + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 5ee379b7fcad..18d577dff497 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -180,6 +180,7 @@ def OMPC_Read : Clause<"read"> { let clangClass = "OMPReadClause"; } def OMPC_Write : Clause<"write"> { let clangClass = "OMPWriteClause"; } def OMPC_Update : Clause<"update"> { let clangClass = "OMPUpdateClause"; } def OMPC_Capture : Clause<"capture"> { let clangClass = "OMPCaptureClause"; } +def OMPC_Compare : Clause<"compare"> { let clangClass = "OMPCompareClause"; } def OMPC_SeqCst : Clause<"seq_cst"> { let clangClass = "OMPSeqCstClause"; } def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; } def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; } @@ -282,7 +283,7 @@ def OMPC_Allocate : Clause<"allocate"> { def OMPC_NonTemporal : Clause<"nontemporal"> { let clangClass = "OMPNontemporalClause"; let flangClass = "Name"; - let isValueList = true; + let isValueList = true; } def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {} @@ -536,6 +537,7 @@ def OMP_Atomic : Directive<"atomic"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause ]; let allowedOnceClauses = [ VersionedClause, diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 046e9b5e809e..84ebb461ebef 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -105,6 +105,11 @@ class AllocaInst : public UnaryInstruction { return cast(Instruction::getType()); } + /// Return the address space for the allocation. + unsigned getAddressSpace() const { + return getType()->getAddressSpace(); + } + /// Get allocation size in bits. Returns None if size can't be determined, /// e.g. in case of a VLA. Optional getAllocationSizeInBits(const DataLayout &DL) const; @@ -1451,6 +1456,10 @@ class FCmpInst: public CmpInst { /// static auto predicates() { return FCmpPredicates(); } + /// Return result of `LHS Pred RHS` comparison. + static bool compare(const APFloat &LHS, const APFloat &RHS, + FCmpInst::Predicate Pred); + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 138770030ebb..da580de3dbd3 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1017,14 +1017,15 @@ def int_codeview_annotation : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], //===------------------------ Trampoline Intrinsics -----------------------===// // -def int_init_trampoline : Intrinsic<[], - [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture>]>, - GCCBuiltin<"__builtin_init_trampoline">; - -def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], - [IntrReadMem, IntrArgMemOnly]>, - GCCBuiltin<"__builtin_adjust_trampoline">; +def int_init_trampoline : DefaultAttrsIntrinsic< + [], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + [IntrArgMemOnly, NoCapture>, WriteOnly>, + ReadNone>, ReadNone>]>, + GCCBuiltin<"__builtin_init_trampoline">; + +def int_adjust_trampoline : DefaultAttrsIntrinsic< + [llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>, + GCCBuiltin<"__builtin_adjust_trampoline">; //===------------------------ Overflow Intrinsics -------------------------===// // diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td index 6799273bf805..177114636a50 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -23,13 +23,6 @@ class Hexagon_i64_i64_Intrinsic; // tag : A2_add -class Hexagon_custom_i32_i32i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty], - intr_properties>; - -// tag : A2_addh_h16_hh class Hexagon_i32_i32i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; // tag : A2_addp -class Hexagon_custom_i64_i64i64_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty], - intr_properties>; - -// tag : A2_addpsat class Hexagon_i64_i64i64_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : A2_neg -class Hexagon_custom_i32_i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i32_ty], [llvm_i32_ty], - intr_properties>; - // tag : A2_roundsat class Hexagon_i32_i64_Intrinsic intr_properties = [IntrNoMem]> @@ -288,20 +267,6 @@ class Hexagon_i64_i64i32i32_Intrinsic; -// tag : M2_dpmpyss_s0 -class Hexagon_custom_i64_i32i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i64_ty], [llvm_i32_ty,llvm_i32_ty], - intr_properties>; - -// tag : S2_asl_i_p -class Hexagon_custom_i64_i64i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty], - intr_properties>; - // tag : S2_insert class Hexagon_i32_i32i32i32i32_Intrinsic intr_properties = [IntrNoMem]> @@ -351,7 +316,7 @@ class Hexagon_v32i32_v64i32_Intrinsic; -// tag : V6_lvsplatb +// tag : V6_lvsplatw class Hexagon_v16i32_i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; // tag : V6_pred_and -class Hexagon_custom_v64i1_v64i1v64i1_Intrinsic< +class Hexagon_v64i1_v64i1v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_and -class Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B< +class Hexagon_v128i1_v128i1v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_not -class Hexagon_custom_v64i1_v64i1_Intrinsic< +class Hexagon_v64i1_v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_not -class Hexagon_custom_v128i1_v128i1_Intrinsic_128B< +class Hexagon_v128i1_v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_scalar2 -class Hexagon_custom_v64i1_i32_Intrinsic< +class Hexagon_v64i1_i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_scalar2 -class Hexagon_custom_v128i1_i32_Intrinsic_128B< +class Hexagon_v128i1_i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -436,27 +401,27 @@ class Hexagon_v64i32_v64i32v64i32v64i32i32_Intrinsic; // tag : V6_vS32b_nqpred_ai -class Hexagon_custom__v64i1ptrv16i32_Intrinsic< +class Hexagon__v64i1ptrv16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vS32b_nqpred_ai -class Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B< +class Hexagon__v128i1ptrv32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; -// tag : V6_vabsb +// tag : V6_vabs_hf class Hexagon_v16i32_v16i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : V6_vabsb +// tag : V6_vabs_hf class Hexagon_v32i32_v32i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; +// tag : V6_vadd_sf_hf +class Hexagon_v32i32_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vadd_sf_hf +class Hexagon_v64i32_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vaddb_dv class Hexagon_v64i32_v64i32v64i32_Intrinsic intr_properties = [IntrNoMem]> @@ -485,16 +464,16 @@ class Hexagon_v64i32_v64i32v64i32_Intrinsic; // tag : V6_vaddbnq -class Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic< +class Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vaddbnq -class Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B< +class Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -513,31 +492,17 @@ class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B< intr_properties>; // tag : V6_vaddcarrysat -class Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic< +class Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vaddcarrysat -class Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty], - intr_properties>; - -// tag : V6_vaddhw -class Hexagon_v32i32_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - -// tag : V6_vaddhw -class Hexagon_v64i32_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; // tag : V6_vaddhw_acc @@ -562,72 +527,72 @@ class Hexagon_v16i32_v16i32v16i32i32_Intrinsic; // tag : V6_vandnqrt -class Hexagon_custom_v16i32_v64i1i32_Intrinsic< +class Hexagon_v16i32_v64i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandnqrt -class Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B< +class Hexagon_v32i32_v128i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandnqrt_acc -class Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic< +class Hexagon_v16i32_v16i32v64i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandnqrt_acc -class Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B< +class Hexagon_v32i32_v32i32v128i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvnqv -class Hexagon_custom_v16i32_v64i1v16i32_Intrinsic< +class Hexagon_v16i32_v64i1v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvnqv -class Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B< +class Hexagon_v32i32_v128i1v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt -class Hexagon_custom_v64i1_v16i32i32_Intrinsic< +class Hexagon_v64i1_v16i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt -class Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B< +class Hexagon_v128i1_v32i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt_acc -class Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic< +class Hexagon_v64i1_v64i1v16i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt_acc -class Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B< +class Hexagon_v128i1_v128i1v32i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -645,6 +610,20 @@ class Hexagon_v32i32_v32i32i32_Intrinsic; +// tag : V6_vasrvuhubrndsat +class Hexagon_v16i32_v32i32v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vasrvuhubrndsat +class Hexagon_v32i32_v64i32v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vassignp class Hexagon_v64i32_v64i32_Intrinsic intr_properties = [IntrNoMem]> @@ -652,6 +631,20 @@ class Hexagon_v64i32_v64i32_Intrinsic; +// tag : V6_vcvt_hf_b +class Hexagon_v32i32_v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vcvt_hf_b +class Hexagon_v64i32_v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vd0 class Hexagon_v16i32__Intrinsic intr_properties = [IntrNoMem]> @@ -687,6 +680,20 @@ class Hexagon_v64i32_v32i32v32i32i32_Intrinsic; +// tag : V6_vdmpy_sf_hf_acc +class Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vdmpy_sf_hf_acc +class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vdmpybus_dv class Hexagon_v64i32_v64i32i32_Intrinsic intr_properties = [IntrNoMem]> @@ -722,45 +729,31 @@ class Hexagon_v32i32_v32i32v64i32i32_Intrinsic; -// tag : V6_vdmpyhvsat_acc -class Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - -// tag : V6_vdmpyhvsat_acc -class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - // tag : V6_veqb -class Hexagon_custom_v64i1_v16i32v16i32_Intrinsic< +class Hexagon_v64i1_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_veqb -class Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B< +class Hexagon_v128i1_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_veqb_and -class Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic< +class Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_veqb_and -class Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B< +class Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -779,16 +772,16 @@ class Hexagon__ptri32i32v32i32_Intrinsic; // tag : V6_vgathermhq -class Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic< +class Hexagon__ptrv64i1i32i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vgathermhq -class Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B< +class Hexagon__ptrv128i1i32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -800,16 +793,16 @@ class Hexagon__ptri32i32v64i32_Intrinsic; // tag : V6_vgathermhwq -class Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic< +class Hexagon__ptrv64i1i32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vgathermhwq -class Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B< +class Hexagon__ptrv128i1i32i32v64i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -891,16 +884,16 @@ class Hexagon_v64i32_v64i32v32i32i32_Intrinsic; // tag : V6_vprefixqb -class Hexagon_custom_v16i32_v64i1_Intrinsic< +class Hexagon_v16i32_v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vprefixqb -class Hexagon_custom_v32i32_v128i1_Intrinsic_128B< +class Hexagon_v32i32_v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -932,20 +925,6 @@ class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic; -// tag : V6_vsb -class Hexagon_v32i32_v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - -// tag : V6_vsb -class Hexagon_v64i32_v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - // tag : V6_vscattermh class Hexagon__i32i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> @@ -961,16 +940,16 @@ class Hexagon__i32i32v32i32v32i32_Intrinsic; // tag : V6_vscattermhq -class Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic< +class Hexagon__v64i1i32i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vscattermhq -class Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B< +class Hexagon__v128i1i32i32v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -989,30 +968,30 @@ class Hexagon__i32i32v64i32v32i32_Intrinsic; // tag : V6_vscattermhwq -class Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic< +class Hexagon__v64i1i32i32v32i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vscattermhwq -class Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B< +class Hexagon__v128i1i32i32v64i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vswap -class Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic< +class Hexagon_v32i32_v64i1v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vswap -class Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B< +class Hexagon_v64i32_v128i1v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -1077,7 +1056,7 @@ def int_hexagon_A2_abssat : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_abssat">; def int_hexagon_A2_add : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_add">; def int_hexagon_A2_addh_h16_hh : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_hh">; @@ -1116,10 +1095,10 @@ def int_hexagon_A2_addh_l16_sat_ll : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">; def int_hexagon_A2_addi : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_addp : -Hexagon_custom_i64_i64i64_Intrinsic; +Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addp">; def int_hexagon_A2_addpsat : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addpsat">; @@ -1131,10 +1110,10 @@ def int_hexagon_A2_addsp : Hexagon_i64_i32i64_Intrinsic<"HEXAGON_A2_addsp">; def int_hexagon_A2_and : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_and">; def int_hexagon_A2_andir : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_andp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">; @@ -1188,7 +1167,7 @@ def int_hexagon_A2_minup : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minup">; def int_hexagon_A2_neg : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_neg">; def int_hexagon_A2_negp : Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_negp">; @@ -1197,16 +1176,16 @@ def int_hexagon_A2_negsat : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_negsat">; def int_hexagon_A2_not : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_not">; def int_hexagon_A2_notp : Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_notp">; def int_hexagon_A2_or : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_or">; def int_hexagon_A2_orir : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_orp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_orp">; @@ -1230,7 +1209,7 @@ def int_hexagon_A2_satuh : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satuh">; def int_hexagon_A2_sub : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_sub">; def int_hexagon_A2_subh_h16_hh : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_hh">; @@ -1269,10 +1248,10 @@ def int_hexagon_A2_subh_l16_sat_ll : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">; def int_hexagon_A2_subp : -Hexagon_custom_i64_i64i64_Intrinsic; +Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_subp">; def int_hexagon_A2_subri : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_subsat : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subsat">; @@ -1308,10 +1287,10 @@ def int_hexagon_A2_swiz : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_swiz">; def int_hexagon_A2_sxtb : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxtb">; def int_hexagon_A2_sxth : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxth">; def int_hexagon_A2_sxtw : Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_sxtw">; @@ -1524,16 +1503,16 @@ def int_hexagon_A2_vsubws : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubws">; def int_hexagon_A2_xor : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_xor">; def int_hexagon_A2_xorp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_xorp">; def int_hexagon_A2_zxtb : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxtb">; def int_hexagon_A2_zxth : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxth">; def int_hexagon_A4_andn : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_andn">; @@ -2088,7 +2067,7 @@ def int_hexagon_M2_dpmpyss_rnd_s0 : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">; def int_hexagon_M2_dpmpyss_s0 : -Hexagon_custom_i64_i32i32_Intrinsic; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_s0">; def int_hexagon_M2_dpmpyuu_acc_s0 : Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">; @@ -2097,7 +2076,7 @@ def int_hexagon_M2_dpmpyuu_nac_s0 : Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">; def int_hexagon_M2_dpmpyuu_s0 : -Hexagon_custom_i64_i32i32_Intrinsic; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">; def int_hexagon_M2_hmmpyh_rs1 : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">; @@ -2514,10 +2493,10 @@ def int_hexagon_M2_mpyd_rnd_ll_s1 : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">; def int_hexagon_M2_mpyi : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyi">; def int_hexagon_M2_mpysmi : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_mpysu_up : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysu_up">; @@ -2670,7 +2649,7 @@ def int_hexagon_M2_mpyud_nac_ll_s1 : Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">; def int_hexagon_M2_mpyui : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyui">; def int_hexagon_M2_nacci : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_nacci">; @@ -2958,7 +2937,7 @@ def int_hexagon_S2_addasl_rrri : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p : -Hexagon_custom_i64_i64i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_acc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc", [IntrNoMem, ImmArg>]>; @@ -2976,7 +2955,7 @@ def int_hexagon_S2_asl_i_p_xacc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_acc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc", [IntrNoMem, ImmArg>]>; @@ -3045,7 +3024,7 @@ def int_hexagon_S2_asl_r_vw : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vw">; def int_hexagon_S2_asr_i_p : -Hexagon_custom_i64_i64i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_acc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc", [IntrNoMem, ImmArg>]>; @@ -3066,7 +3045,7 @@ def int_hexagon_S2_asr_i_p_rnd_goodsyntax : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_acc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc", [IntrNoMem, ImmArg>]>; @@ -3258,7 +3237,7 @@ def int_hexagon_S2_lsl_r_vw : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vw">; def int_hexagon_S2_lsr_i_p : -Hexagon_custom_i64_i64i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_acc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc", [IntrNoMem, ImmArg>]>; @@ -3276,7 +3255,7 @@ def int_hexagon_S2_lsr_i_p_xacc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_acc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc", [IntrNoMem, ImmArg>]>; @@ -3809,70 +3788,70 @@ def int_hexagon_V6_lvsplatw_128B : Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplatw_128B">; def int_hexagon_V6_pred_and : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_and">; def int_hexagon_V6_pred_and_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_and_128B">; def int_hexagon_V6_pred_and_n : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_and_n">; def int_hexagon_V6_pred_and_n_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_and_n_128B">; def int_hexagon_V6_pred_not : -Hexagon_custom_v64i1_v64i1_Intrinsic; +Hexagon_v64i1_v64i1_Intrinsic<"HEXAGON_V6_pred_not">; def int_hexagon_V6_pred_not_128B : -Hexagon_custom_v128i1_v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1_Intrinsic<"HEXAGON_V6_pred_not_128B">; def int_hexagon_V6_pred_or : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_or">; def int_hexagon_V6_pred_or_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_or_128B">; def int_hexagon_V6_pred_or_n : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_or_n">; def int_hexagon_V6_pred_or_n_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_or_n_128B">; def int_hexagon_V6_pred_scalar2 : -Hexagon_custom_v64i1_i32_Intrinsic; +Hexagon_v64i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2">; def int_hexagon_V6_pred_scalar2_128B : -Hexagon_custom_v128i1_i32_Intrinsic_128B; +Hexagon_v128i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">; def int_hexagon_V6_pred_xor : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_xor">; def int_hexagon_V6_pred_xor_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_xor_128B">; def int_hexagon_V6_vS32b_nqpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nqpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_nqpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_nqpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_qpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_qpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vS32b_qpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_qpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vabsdiffh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffh">; @@ -3935,16 +3914,16 @@ def int_hexagon_V6_vaddb_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">; def int_hexagon_V6_vaddbnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbnq">; def int_hexagon_V6_vaddbnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">; def int_hexagon_V6_vaddbq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbq">; def int_hexagon_V6_vaddbq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbq_128B">; def int_hexagon_V6_vaddh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddh">; @@ -3959,16 +3938,16 @@ def int_hexagon_V6_vaddh_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">; def int_hexagon_V6_vaddhnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhnq">; def int_hexagon_V6_vaddhnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhnq_128B">; def int_hexagon_V6_vaddhq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhq">; def int_hexagon_V6_vaddhq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhq_128B">; def int_hexagon_V6_vaddhsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhsat">; @@ -4037,16 +4016,16 @@ def int_hexagon_V6_vaddw_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">; def int_hexagon_V6_vaddwnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwnq">; def int_hexagon_V6_vaddwnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwnq_128B">; def int_hexagon_V6_vaddwq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwq">; def int_hexagon_V6_vaddwq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwq_128B">; def int_hexagon_V6_vaddwsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwsat">; @@ -4079,28 +4058,28 @@ def int_hexagon_V6_vand_128B : Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vand_128B">; def int_hexagon_V6_vandqrt : -Hexagon_custom_v16i32_v64i1i32_Intrinsic; +Hexagon_v16i32_v64i1i32_Intrinsic<"HEXAGON_V6_vandqrt">; def int_hexagon_V6_vandqrt_128B : -Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v128i1i32_Intrinsic<"HEXAGON_V6_vandqrt_128B">; def int_hexagon_V6_vandqrt_acc : -Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic; +Hexagon_v16i32_v16i32v64i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc">; def int_hexagon_V6_vandqrt_acc_128B : -Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v32i32v128i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">; def int_hexagon_V6_vandvrt : -Hexagon_custom_v64i1_v16i32i32_Intrinsic; +Hexagon_v64i1_v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt">; def int_hexagon_V6_vandvrt_128B : -Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_128B">; def int_hexagon_V6_vandvrt_acc : -Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc">; def int_hexagon_V6_vandvrt_acc_128B : -Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">; def int_hexagon_V6_vaslh : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vaslh">; @@ -4439,220 +4418,220 @@ def int_hexagon_V6_vdsaduh_acc_128B : Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">; def int_hexagon_V6_veqb : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb">; def int_hexagon_V6_veqb_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_128B">; def int_hexagon_V6_veqb_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_and">; def int_hexagon_V6_veqb_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_and_128B">; def int_hexagon_V6_veqb_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_or">; def int_hexagon_V6_veqb_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_or_128B">; def int_hexagon_V6_veqb_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_xor">; def int_hexagon_V6_veqb_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_xor_128B">; def int_hexagon_V6_veqh : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh">; def int_hexagon_V6_veqh_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_128B">; def int_hexagon_V6_veqh_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_and">; def int_hexagon_V6_veqh_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">; def int_hexagon_V6_veqh_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_or">; def int_hexagon_V6_veqh_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_or_128B">; def int_hexagon_V6_veqh_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_xor">; def int_hexagon_V6_veqh_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_xor_128B">; def int_hexagon_V6_veqw : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw">; def int_hexagon_V6_veqw_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_128B">; def int_hexagon_V6_veqw_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_and">; def int_hexagon_V6_veqw_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_and_128B">; def int_hexagon_V6_veqw_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_or">; def int_hexagon_V6_veqw_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_or_128B">; def int_hexagon_V6_veqw_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_xor">; def int_hexagon_V6_veqw_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_xor_128B">; def int_hexagon_V6_vgtb : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb">; def int_hexagon_V6_vgtb_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_128B">; def int_hexagon_V6_vgtb_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_and">; def int_hexagon_V6_vgtb_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_and_128B">; def int_hexagon_V6_vgtb_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_or">; def int_hexagon_V6_vgtb_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_or_128B">; def int_hexagon_V6_vgtb_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_xor">; def int_hexagon_V6_vgtb_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">; def int_hexagon_V6_vgth : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth">; def int_hexagon_V6_vgth_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_128B">; def int_hexagon_V6_vgth_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_and">; def int_hexagon_V6_vgth_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_and_128B">; def int_hexagon_V6_vgth_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_or">; def int_hexagon_V6_vgth_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_or_128B">; def int_hexagon_V6_vgth_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_xor">; def int_hexagon_V6_vgth_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_xor_128B">; def int_hexagon_V6_vgtub : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub">; def int_hexagon_V6_vgtub_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_128B">; def int_hexagon_V6_vgtub_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_and">; def int_hexagon_V6_vgtub_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_and_128B">; def int_hexagon_V6_vgtub_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_or">; def int_hexagon_V6_vgtub_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_or_128B">; def int_hexagon_V6_vgtub_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_xor">; def int_hexagon_V6_vgtub_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">; def int_hexagon_V6_vgtuh : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh">; def int_hexagon_V6_vgtuh_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_128B">; def int_hexagon_V6_vgtuh_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_and">; def int_hexagon_V6_vgtuh_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">; def int_hexagon_V6_vgtuh_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_or">; def int_hexagon_V6_vgtuh_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">; def int_hexagon_V6_vgtuh_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_xor">; def int_hexagon_V6_vgtuh_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">; def int_hexagon_V6_vgtuw : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw">; def int_hexagon_V6_vgtuw_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_128B">; def int_hexagon_V6_vgtuw_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_and">; def int_hexagon_V6_vgtuw_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">; def int_hexagon_V6_vgtuw_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_or">; def int_hexagon_V6_vgtuw_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">; def int_hexagon_V6_vgtuw_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_xor">; def int_hexagon_V6_vgtuw_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">; def int_hexagon_V6_vgtw : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw">; def int_hexagon_V6_vgtw_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_128B">; def int_hexagon_V6_vgtw_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_and">; def int_hexagon_V6_vgtw_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_and_128B">; def int_hexagon_V6_vgtw_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_or">; def int_hexagon_V6_vgtw_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_or_128B">; def int_hexagon_V6_vgtw_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_xor">; def int_hexagon_V6_vgtw_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">; def int_hexagon_V6_vinsertwr : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vinsertwr">; @@ -5051,10 +5030,10 @@ def int_hexagon_V6_vmpyuhv_acc_128B : Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">; def int_hexagon_V6_vmux : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vmux">; def int_hexagon_V6_vmux_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vmux_128B">; def int_hexagon_V6_vnavgh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgh">; @@ -5375,16 +5354,16 @@ def int_hexagon_V6_vsubb_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">; def int_hexagon_V6_vsubbnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbnq">; def int_hexagon_V6_vsubbnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbnq_128B">; def int_hexagon_V6_vsubbq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbq">; def int_hexagon_V6_vsubbq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbq_128B">; def int_hexagon_V6_vsubh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubh">; @@ -5399,16 +5378,16 @@ def int_hexagon_V6_vsubh_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">; def int_hexagon_V6_vsubhnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhnq">; def int_hexagon_V6_vsubhnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhnq_128B">; def int_hexagon_V6_vsubhq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhq">; def int_hexagon_V6_vsubhq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhq_128B">; def int_hexagon_V6_vsubhsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhsat">; @@ -5477,16 +5456,16 @@ def int_hexagon_V6_vsubw_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">; def int_hexagon_V6_vsubwnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwnq">; def int_hexagon_V6_vsubwnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwnq_128B">; def int_hexagon_V6_vsubwq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwq">; def int_hexagon_V6_vsubwq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwq_128B">; def int_hexagon_V6_vsubwsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwsat">; @@ -5501,10 +5480,10 @@ def int_hexagon_V6_vsubwsat_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">; def int_hexagon_V6_vswap : -Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v32i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vswap">; def int_hexagon_V6_vswap_128B : -Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v64i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vswap_128B">; def int_hexagon_V6_vtmpyb : Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyb">; @@ -5611,22 +5590,22 @@ def int_hexagon_V6_lvsplath_128B : Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplath_128B">; def int_hexagon_V6_pred_scalar2v2 : -Hexagon_custom_v64i1_i32_Intrinsic; +Hexagon_v64i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2">; def int_hexagon_V6_pred_scalar2v2_128B : -Hexagon_custom_v128i1_i32_Intrinsic_128B; +Hexagon_v128i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">; def int_hexagon_V6_shuffeqh : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_shuffeqh">; def int_hexagon_V6_shuffeqh_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_shuffeqh_128B">; def int_hexagon_V6_shuffeqw : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_shuffeqw">; def int_hexagon_V6_shuffeqw_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_shuffeqw_128B">; def int_hexagon_V6_vaddbsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbsat">; @@ -5695,28 +5674,28 @@ def int_hexagon_V6_vadduwsat_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vadduwsat_dv_128B">; def int_hexagon_V6_vandnqrt : -Hexagon_custom_v16i32_v64i1i32_Intrinsic; +Hexagon_v16i32_v64i1i32_Intrinsic<"HEXAGON_V6_vandnqrt">; def int_hexagon_V6_vandnqrt_128B : -Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v128i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_128B">; def int_hexagon_V6_vandnqrt_acc : -Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic; +Hexagon_v16i32_v16i32v64i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc">; def int_hexagon_V6_vandnqrt_acc_128B : -Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v32i32v128i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">; def int_hexagon_V6_vandvnqv : -Hexagon_custom_v16i32_v64i1v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32_Intrinsic<"HEXAGON_V6_vandvnqv">; def int_hexagon_V6_vandvnqv_128B : -Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32_Intrinsic<"HEXAGON_V6_vandvnqv_128B">; def int_hexagon_V6_vandvqv : -Hexagon_custom_v16i32_v64i1v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32_Intrinsic<"HEXAGON_V6_vandvqv">; def int_hexagon_V6_vandvqv_128B : -Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32_Intrinsic<"HEXAGON_V6_vandvqv_128B">; def int_hexagon_V6_vasrhbsat : Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhbsat">; @@ -5961,10 +5940,10 @@ def int_hexagon_V6_vgathermh_128B : Hexagon__ptri32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermh_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhq : -Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>; +Hexagon__ptrv64i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermhq", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhq_128B : -Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>; +Hexagon__ptrv128i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhq_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhw : Hexagon__ptri32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhw", [IntrArgMemOnly]>; @@ -5973,10 +5952,10 @@ def int_hexagon_V6_vgathermhw_128B : Hexagon__ptri32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhw_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhwq : -Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic<[IntrArgMemOnly]>; +Hexagon__ptrv64i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhwq", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhwq_128B : -Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B<[IntrArgMemOnly]>; +Hexagon__ptrv128i1i32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhwq_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermw : Hexagon__ptri32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermw", [IntrArgMemOnly]>; @@ -5985,10 +5964,10 @@ def int_hexagon_V6_vgathermw_128B : Hexagon__ptri32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermw_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermwq : -Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>; +Hexagon__ptrv64i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermwq", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermwq_128B : -Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>; +Hexagon__ptrv128i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermwq_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vlut4 : Hexagon_v16i32_v16i32i64_Intrinsic<"HEXAGON_V6_vlut4">; @@ -6051,22 +6030,22 @@ def int_hexagon_V6_vnavgb_128B : Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgb_128B">; def int_hexagon_V6_vprefixqb : -Hexagon_custom_v16i32_v64i1_Intrinsic; +Hexagon_v16i32_v64i1_Intrinsic<"HEXAGON_V6_vprefixqb">; def int_hexagon_V6_vprefixqb_128B : -Hexagon_custom_v32i32_v128i1_Intrinsic_128B; +Hexagon_v32i32_v128i1_Intrinsic<"HEXAGON_V6_vprefixqb_128B">; def int_hexagon_V6_vprefixqh : -Hexagon_custom_v16i32_v64i1_Intrinsic; +Hexagon_v16i32_v64i1_Intrinsic<"HEXAGON_V6_vprefixqh">; def int_hexagon_V6_vprefixqh_128B : -Hexagon_custom_v32i32_v128i1_Intrinsic_128B; +Hexagon_v32i32_v128i1_Intrinsic<"HEXAGON_V6_vprefixqh_128B">; def int_hexagon_V6_vprefixqw : -Hexagon_custom_v16i32_v64i1_Intrinsic; +Hexagon_v16i32_v64i1_Intrinsic<"HEXAGON_V6_vprefixqw">; def int_hexagon_V6_vprefixqw_128B : -Hexagon_custom_v32i32_v128i1_Intrinsic_128B; +Hexagon_v32i32_v128i1_Intrinsic<"HEXAGON_V6_vprefixqw_128B">; def int_hexagon_V6_vscattermh : Hexagon__i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermh", [IntrWriteMem]>; @@ -6081,10 +6060,10 @@ def int_hexagon_V6_vscattermh_add_128B : Hexagon__i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermh_add_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermhq : -Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhq", [IntrWriteMem]>; def int_hexagon_V6_vscattermhq_128B : -Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhq_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermhw : Hexagon__i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhw", [IntrWriteMem]>; @@ -6099,10 +6078,10 @@ def int_hexagon_V6_vscattermhw_add_128B : Hexagon__i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermhwq : -Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhwq", [IntrWriteMem]>; def int_hexagon_V6_vscattermhwq_128B : -Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhwq_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermw : Hexagon__i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermw", [IntrWriteMem]>; @@ -6117,18 +6096,18 @@ def int_hexagon_V6_vscattermw_add_128B : Hexagon__i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermw_add_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermwq : -Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermwq", [IntrWriteMem]>; def int_hexagon_V6_vscattermwq_128B : -Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermwq_128B", [IntrWriteMem]>; // V66 HVX Instructions. def int_hexagon_V6_vaddcarrysat : -Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic; +Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic<"HEXAGON_V6_vaddcarrysat">; def int_hexagon_V6_vaddcarrysat_128B : -Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B; +Hexagon_v32i32_v32i32v32i32v128i1_Intrinsic<"HEXAGON_V6_vaddcarrysat_128B">; def int_hexagon_V6_vasr_into : Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vasr_into">; @@ -6174,3 +6153,437 @@ Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_v6mpyvubs10_vxx", [In def int_hexagon_V6_v6mpyvubs10_vxx_128B : Hexagon_v64i32_v64i32v64i32v64i32i32_Intrinsic<"HEXAGON_V6_v6mpyvubs10_vxx_128B", [IntrNoMem, ImmArg>]>; +def int_hexagon_V6_vabs_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabs_hf">; + +def int_hexagon_V6_vabs_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabs_hf_128B">; + +def int_hexagon_V6_vabs_sf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabs_sf">; + +def int_hexagon_V6_vabs_sf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabs_sf_128B">; + +def int_hexagon_V6_vadd_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_hf">; + +def int_hexagon_V6_vadd_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_hf_128B">; + +def int_hexagon_V6_vadd_hf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_hf_hf">; + +def int_hexagon_V6_vadd_hf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_hf_hf_128B">; + +def int_hexagon_V6_vadd_qf16 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf16">; + +def int_hexagon_V6_vadd_qf16_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf16_128B">; + +def int_hexagon_V6_vadd_qf16_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf16_mix">; + +def int_hexagon_V6_vadd_qf16_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf16_mix_128B">; + +def int_hexagon_V6_vadd_qf32 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf32">; + +def int_hexagon_V6_vadd_qf32_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf32_128B">; + +def int_hexagon_V6_vadd_qf32_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf32_mix">; + +def int_hexagon_V6_vadd_qf32_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf32_mix_128B">; + +def int_hexagon_V6_vadd_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf">; + +def int_hexagon_V6_vadd_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_128B">; + +def int_hexagon_V6_vadd_sf_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf_hf">; + +def int_hexagon_V6_vadd_sf_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_hf_128B">; + +def int_hexagon_V6_vadd_sf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf_sf">; + +def int_hexagon_V6_vadd_sf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_sf_128B">; + +def int_hexagon_V6_vassign_fp : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vassign_fp">; + +def int_hexagon_V6_vassign_fp_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vassign_fp_128B">; + +def int_hexagon_V6_vconv_hf_qf16 : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf16">; + +def int_hexagon_V6_vconv_hf_qf16_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf16_128B">; + +def int_hexagon_V6_vconv_hf_qf32 : +Hexagon_v16i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf32">; + +def int_hexagon_V6_vconv_hf_qf32_128B : +Hexagon_v32i32_v64i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf32_128B">; + +def int_hexagon_V6_vconv_sf_qf32 : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_sf_qf32">; + +def int_hexagon_V6_vconv_sf_qf32_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_sf_qf32_128B">; + +def int_hexagon_V6_vcvt_b_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_b_hf">; + +def int_hexagon_V6_vcvt_b_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_b_hf_128B">; + +def int_hexagon_V6_vcvt_h_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_h_hf">; + +def int_hexagon_V6_vcvt_h_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_h_hf_128B">; + +def int_hexagon_V6_vcvt_hf_b : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_b">; + +def int_hexagon_V6_vcvt_hf_b_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_b_128B">; + +def int_hexagon_V6_vcvt_hf_h : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_h">; + +def int_hexagon_V6_vcvt_hf_h_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_h_128B">; + +def int_hexagon_V6_vcvt_hf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_sf">; + +def int_hexagon_V6_vcvt_hf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_sf_128B">; + +def int_hexagon_V6_vcvt_hf_ub : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_ub">; + +def int_hexagon_V6_vcvt_hf_ub_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_ub_128B">; + +def int_hexagon_V6_vcvt_hf_uh : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_uh">; + +def int_hexagon_V6_vcvt_hf_uh_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_uh_128B">; + +def int_hexagon_V6_vcvt_sf_hf : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_sf_hf">; + +def int_hexagon_V6_vcvt_sf_hf_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_sf_hf_128B">; + +def int_hexagon_V6_vcvt_ub_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_ub_hf">; + +def int_hexagon_V6_vcvt_ub_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_ub_hf_128B">; + +def int_hexagon_V6_vcvt_uh_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_uh_hf">; + +def int_hexagon_V6_vcvt_uh_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_uh_hf_128B">; + +def int_hexagon_V6_vdmpy_sf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf">; + +def int_hexagon_V6_vdmpy_sf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf_128B">; + +def int_hexagon_V6_vdmpy_sf_hf_acc : +Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf_acc">; + +def int_hexagon_V6_vdmpy_sf_hf_acc_128B : +Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf_acc_128B">; + +def int_hexagon_V6_vfmax_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmax_hf">; + +def int_hexagon_V6_vfmax_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmax_hf_128B">; + +def int_hexagon_V6_vfmax_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmax_sf">; + +def int_hexagon_V6_vfmax_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmax_sf_128B">; + +def int_hexagon_V6_vfmin_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmin_hf">; + +def int_hexagon_V6_vfmin_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmin_hf_128B">; + +def int_hexagon_V6_vfmin_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmin_sf">; + +def int_hexagon_V6_vfmin_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmin_sf_128B">; + +def int_hexagon_V6_vfneg_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vfneg_hf">; + +def int_hexagon_V6_vfneg_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vfneg_hf_128B">; + +def int_hexagon_V6_vfneg_sf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vfneg_sf">; + +def int_hexagon_V6_vfneg_sf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vfneg_sf_128B">; + +def int_hexagon_V6_vgthf : +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf">; + +def int_hexagon_V6_vgthf_128B : +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_128B">; + +def int_hexagon_V6_vgthf_and : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf_and">; + +def int_hexagon_V6_vgthf_and_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_and_128B">; + +def int_hexagon_V6_vgthf_or : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf_or">; + +def int_hexagon_V6_vgthf_or_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_or_128B">; + +def int_hexagon_V6_vgthf_xor : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf_xor">; + +def int_hexagon_V6_vgthf_xor_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_xor_128B">; + +def int_hexagon_V6_vgtsf : +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf">; + +def int_hexagon_V6_vgtsf_128B : +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_128B">; + +def int_hexagon_V6_vgtsf_and : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf_and">; + +def int_hexagon_V6_vgtsf_and_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_and_128B">; + +def int_hexagon_V6_vgtsf_or : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf_or">; + +def int_hexagon_V6_vgtsf_or_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_or_128B">; + +def int_hexagon_V6_vgtsf_xor : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf_xor">; + +def int_hexagon_V6_vgtsf_xor_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_xor_128B">; + +def int_hexagon_V6_vmax_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmax_hf">; + +def int_hexagon_V6_vmax_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmax_hf_128B">; + +def int_hexagon_V6_vmax_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmax_sf">; + +def int_hexagon_V6_vmax_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmax_sf_128B">; + +def int_hexagon_V6_vmin_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmin_hf">; + +def int_hexagon_V6_vmin_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmin_hf_128B">; + +def int_hexagon_V6_vmin_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmin_sf">; + +def int_hexagon_V6_vmin_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmin_sf_128B">; + +def int_hexagon_V6_vmpy_hf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf">; + +def int_hexagon_V6_vmpy_hf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf_128B">; + +def int_hexagon_V6_vmpy_hf_hf_acc : +Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf_acc">; + +def int_hexagon_V6_vmpy_hf_hf_acc_128B : +Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf_acc_128B">; + +def int_hexagon_V6_vmpy_qf16 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf16">; + +def int_hexagon_V6_vmpy_qf16_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_128B">; + +def int_hexagon_V6_vmpy_qf16_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_hf">; + +def int_hexagon_V6_vmpy_qf16_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_hf_128B">; + +def int_hexagon_V6_vmpy_qf16_mix_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_mix_hf">; + +def int_hexagon_V6_vmpy_qf16_mix_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_mix_hf_128B">; + +def int_hexagon_V6_vmpy_qf32 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32">; + +def int_hexagon_V6_vmpy_qf32_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_128B">; + +def int_hexagon_V6_vmpy_qf32_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_hf">; + +def int_hexagon_V6_vmpy_qf32_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_hf_128B">; + +def int_hexagon_V6_vmpy_qf32_mix_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_mix_hf">; + +def int_hexagon_V6_vmpy_qf32_mix_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_mix_hf_128B">; + +def int_hexagon_V6_vmpy_qf32_qf16 : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_qf16">; + +def int_hexagon_V6_vmpy_qf32_qf16_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_qf16_128B">; + +def int_hexagon_V6_vmpy_qf32_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_sf">; + +def int_hexagon_V6_vmpy_qf32_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_sf_128B">; + +def int_hexagon_V6_vmpy_sf_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf">; + +def int_hexagon_V6_vmpy_sf_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf_128B">; + +def int_hexagon_V6_vmpy_sf_hf_acc : +Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf_acc">; + +def int_hexagon_V6_vmpy_sf_hf_acc_128B : +Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf_acc_128B">; + +def int_hexagon_V6_vmpy_sf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_sf">; + +def int_hexagon_V6_vmpy_sf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_sf_128B">; + +def int_hexagon_V6_vsub_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_hf">; + +def int_hexagon_V6_vsub_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_hf_128B">; + +def int_hexagon_V6_vsub_hf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_hf_hf">; + +def int_hexagon_V6_vsub_hf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_hf_hf_128B">; + +def int_hexagon_V6_vsub_qf16 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf16">; + +def int_hexagon_V6_vsub_qf16_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf16_128B">; + +def int_hexagon_V6_vsub_qf16_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf16_mix">; + +def int_hexagon_V6_vsub_qf16_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf16_mix_128B">; + +def int_hexagon_V6_vsub_qf32 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf32">; + +def int_hexagon_V6_vsub_qf32_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf32_128B">; + +def int_hexagon_V6_vsub_qf32_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf32_mix">; + +def int_hexagon_V6_vsub_qf32_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf32_mix_128B">; + +def int_hexagon_V6_vsub_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf">; + +def int_hexagon_V6_vsub_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_128B">; + +def int_hexagon_V6_vsub_sf_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_hf">; + +def int_hexagon_V6_vsub_sf_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_hf_128B">; + +def int_hexagon_V6_vsub_sf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_sf">; + +def int_hexagon_V6_vsub_sf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_sf_128B">; + +// V69 HVX Instructions. + +def int_hexagon_V6_vasrvuhubrndsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvuhubrndsat">; + +def int_hexagon_V6_vasrvuhubrndsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvuhubrndsat_128B">; + +def int_hexagon_V6_vasrvuhubsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvuhubsat">; + +def int_hexagon_V6_vasrvuhubsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvuhubsat_128B">; + +def int_hexagon_V6_vasrvwuhrndsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvwuhrndsat">; + +def int_hexagon_V6_vasrvwuhrndsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvwuhrndsat_128B">; + +def int_hexagon_V6_vasrvwuhsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvwuhsat">; + +def int_hexagon_V6_vasrvwuhsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvwuhsat_128B">; + +def int_hexagon_V6_vmpyuhvs : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyuhvs">; + +def int_hexagon_V6_vmpyuhvs_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhvs_128B">; + diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index b006bb321819..3510eeca8953 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -65,7 +65,7 @@ class MCTargetOptions { std::string COFFOutputFilename; const char *Argv0 = nullptr; - ArrayRef CommandLineArgs; + ArrayRef CommandLineArgs; /// Additional paths to search for `.include` directives when using the /// integrated assembler. diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index ae0ee22561ce..aec84124129f 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -12,7 +12,7 @@ if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API) ${LLVM_INLINER_MODEL_PATH} ${LLVM_INLINER_MODEL_CURRENT_URL} ${LLVM_INLINER_MODEL_PATH_DEFAULT} - "models/inlining/config.py" + "models/gen-inline-oz-test-model.py" serve action InlinerSizeModel diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index e4920c351cca..922b38e92785 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1778,15 +1778,8 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, // If the operation does not change exception status flags, it is safe // to fold. - if (St == APFloat::opStatus::opOK) { - // When FP exceptions are not ignored, intrinsic call will not be - // eliminated, because it is considered as having side effect. But we - // know that its evaluation does not raise exceptions, so side effect - // is absent. To allow removing the call, mark it as not accessing memory. - if (EB && *EB != fp::ExceptionBehavior::ebIgnore) - CI->addFnAttr(Attribute::ReadNone); + if (St == APFloat::opStatus::opOK) return true; - } // If evaluation raised FP exception, the result can depend on rounding // mode. If the latter is unknown, folding is not possible. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 029859f197e0..4831b22b1d46 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2173,6 +2173,15 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, } } + // ((X | Y) ^ X ) & ((X | Y) ^ Y) --> 0 + // ((X | Y) ^ Y ) & ((X | Y) ^ X) --> 0 + BinaryOperator *Or; + if (match(Op0, m_c_Xor(m_Value(X), + m_CombineAnd(m_BinOp(Or), + m_c_Or(m_Deferred(X), m_Value(Y))))) && + match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y)))) + return Constant::getNullValue(Op0->getType()); + return nullptr; } diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 636582827297..a877b19df866 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -120,33 +120,6 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) { Optional MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) { - if (const IntrinsicInst *II = dyn_cast(CB)) { - if (auto *MemInst = dyn_cast(CB)) - return getForDest(MemInst); - - switch (II->getIntrinsicID()) { - default: - return None; - case Intrinsic::init_trampoline: - return MemoryLocation::getForArgument(CB, 0, TLI); - case Intrinsic::masked_store: - return MemoryLocation::getForArgument(CB, 1, TLI); - } - } - - LibFunc LF; - if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { - switch (LF) { - case LibFunc_strncpy: - case LibFunc_strcpy: - case LibFunc_strcat: - case LibFunc_strncat: - return getForArgument(CB, 0, &TLI); - default: - break; - } - } - if (!CB->onlyAccessesArgMemory()) return None; @@ -159,9 +132,6 @@ MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) { for (unsigned i = 0; i < CB->arg_size(); i++) { if (!CB->getArgOperand(i)->getType()->isPointerTy()) continue; - if (!CB->doesNotCapture(i)) - // capture would allow the address to be read back in an untracked manner - return None; if (CB->onlyReadsMemory(i)) continue; if (!UsedV) { diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8bb925feeffa..6aa9a77391dc 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -982,10 +982,10 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller, return TTIImpl->areInlineCompatible(Caller, Callee); } -bool TargetTransformInfo::areFunctionArgsABICompatible( +bool TargetTransformInfo::areTypesABICompatible( const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const { - return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args); + const ArrayRef &Types) const { + return TTIImpl->areTypesABICompatible(Caller, Callee, Types); } bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, diff --git a/llvm/lib/Analysis/models/inlining/config.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py similarity index 58% rename from llvm/lib/Analysis/models/inlining/config.py rename to llvm/lib/Analysis/models/gen-inline-oz-test-model.py index 78d3a8259cc2..d800b1476eed 100644 --- a/llvm/lib/Analysis/models/inlining/config.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -1,4 +1,13 @@ -"""Inlining Training config.""" +"""Generate a mock model for LLVM tests. + +The generated model is not a neural net - it is just a tf.function with the +correct input and output parameters. By construction, the mock model will always +output 1. +""" + +import os +import importlib.util +import sys import tensorflow as tf @@ -85,3 +94,49 @@ def get_output_signature(): def get_output_spec(): return POLICY_OUTPUT_SPEC + +def get_output_spec_path(path): + return os.path.join(path, 'output_spec.json') + + +def build_mock_model(path, signature): + """Build and save the mock model with the given signature""" + module = tf.Module() + + # We have to set this useless variable in order for the TF C API to correctly + # intake it + module.var = tf.Variable(0.) + + def action(*inputs): + s = tf.reduce_sum([tf.cast(x, tf.float32) for x in tf.nest.flatten(inputs)]) + return {signature['output']: float('inf') + s + module.var} + + module.action = tf.function()(action) + action = {'action': module.action.get_concrete_function(signature['inputs'])} + tf.saved_model.save(module, path, signatures=action) + + output_spec_path = get_output_spec_path(path) + with open(output_spec_path, 'w') as f: + print(f'Writing output spec to {output_spec_path}.') + f.write(signature['output_spec']) + + +def get_signature(): + return { + 'inputs': get_input_signature(), + 'output': get_output_signature(), + 'output_spec': get_output_spec() + } + + +def main(argv): + assert len(argv) == 2 + model_path = argv[1] + + print(f'Output model to: [{argv[1]}]') + signature = get_signature() + build_mock_model(model_path, signature) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/llvm/lib/Analysis/models/generate_mock_model.py b/llvm/lib/Analysis/models/generate_mock_model.py deleted file mode 100644 index f1170abba383..000000000000 --- a/llvm/lib/Analysis/models/generate_mock_model.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Generate a mock model for LLVM tests. - -The generated model is not a neural net - it is just a tf.function with the -correct input and output parameters. By construction, the mock model will always -output 1. -""" - -import os -import importlib.util -import sys - -import tensorflow as tf - - -def get_output_spec_path(path): - return os.path.join(path, 'output_spec.json') - - -def build_mock_model(path, signature): - """Build and save the mock model with the given signature""" - module = tf.Module() - - # We have to set this useless variable in order for the TF C API to correctly - # intake it - module.var = tf.Variable(0.) - - def action(*inputs): - s = tf.reduce_sum([tf.cast(x, tf.float32) for x in tf.nest.flatten(inputs)]) - return {signature['output']: float('inf') + s + module.var} - - module.action = tf.function()(action) - action = {'action': module.action.get_concrete_function(signature['inputs'])} - tf.saved_model.save(module, path, signatures=action) - - output_spec_path = get_output_spec_path(path) - with open(output_spec_path, 'w') as f: - print(f'Writing output spec to {output_spec_path}.') - f.write(signature['output_spec']) - - -def get_external_signature(config_path): - """Get the signature for the desired model. - - We manually import the python file at config_path to avoid adding a gin - dependency to the LLVM build. - """ - spec = importlib.util.spec_from_file_location('config', config_path) - config = importlib.util.module_from_spec(spec) - spec.loader.exec_module(config) - - return { - 'inputs': config.get_input_signature(), - 'output': config.get_output_signature(), - 'output_spec': config.get_output_spec() - } - - -def main(argv): - assert len(argv) == 3 - config_path = argv[1] - model_path = argv[2] - - print(f'Using config file at [{argv[1]}]') - signature = get_external_signature(config_path) - build_mock_model(model_path, signature) - - -if __name__ == '__main__': - main(sys.argv) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index cf8c0c03772e..9b73f0ab2f05 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -551,18 +551,8 @@ void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, // Emit lexical blocks. DIE *ScopeDIE = constructLexicalScopeDIE(Scope); assert(ScopeDIE && "Scope DIE should not be null."); - ParentScopeDIE.addChild(ScopeDIE); - - // Track abstract and concrete lexical block scopes. - if (Scope->isAbstractScope()) { - assert(!getAbstractScopeDIEs().count(DS) && - "Abstract DIE for this scope exists!"); - getAbstractScopeDIEs()[DS] = ScopeDIE; - } else if (!Scope->getInlinedAt()) { - assert(!LocalScopeDIEs.count(DS) && "Concrete DIE for this scope exists!"); - LocalScopeDIEs[DS] = ScopeDIE; - } + ParentScopeDIE.addChild(ScopeDIE); createAndAddScopeChildren(Scope, *ScopeDIE); } @@ -656,7 +646,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP]; + DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); @@ -1020,12 +1010,6 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, if (Scope) { assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); - - // Remember the subrogram before creating child entities. - assert(!LocalScopeDIEs.count(Sub) && - "Concrete DIE for the subprogram exists!"); - LocalScopeDIEs[Sub] = &ScopeDIE; - // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a // block's synthetic this pointer. @@ -1061,18 +1045,27 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, for (DbgVariable *DV : Locals) ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer)); + // Emit imported entities (skipped in gmlt-like data). + if (!includeMinimalInlineScopes()) { + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + ScopeDIE.addChild(constructImportedEntityDIE(cast(IE))); + } + // Emit labels. for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) ScopeDIE.addChild(constructLabelDIE(*DL, *Scope)); // Emit inner lexical scopes. - auto needToEmitLexicalScope = [this](LexicalScope *LS) -> bool { + auto needToEmitLexicalScope = [this](LexicalScope *LS) { if (isa(LS->getScopeNode())) return true; auto Vars = DU->getScopeVariables().lookup(LS); if (!Vars.Args.empty() || !Vars.Locals.empty()) return true; - return LocalScopesWithLocalDecls.count(LS->getScopeNode()); + if (!includeMinimalInlineScopes() && + !ImportedEntities[LS->getScopeNode()].empty()) + return true; + return false; }; for (LexicalScope *LS : Scope->getChildren()) { // If the lexical block doesn't have non-scope children, skip @@ -1088,10 +1081,11 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { + DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; auto *SP = cast(Scope->getScopeNode()); - if (getAbstractScopeDIEs().count(SP)) - return; DIE *ContextDIE; DwarfCompileUnit *ContextCU = this; @@ -1115,19 +1109,14 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. - DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, - *ContextDIE, nullptr); - - // Store the DIE before creating children. - getAbstractScopeDIEs()[SP] = &AbsDef; - - ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef); - ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline, + AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); + ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); + ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, DD->getDwarfVersion() <= 4 ? Optional() : dwarf::DW_FORM_implicit_const, dwarf::DW_INL_inlined); - if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef)) - ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); + if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) + ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { @@ -1261,61 +1250,47 @@ void DwarfCompileUnit::constructCallSiteParmEntryDIEs( } } -DIE *DwarfCompileUnit::createImportedEntityDIE(const DIImportedEntity *IE) { - DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)IE->getTag()); - insertDIE(IE, IMDie); - +DIE *DwarfCompileUnit::constructImportedEntityDIE( + const DIImportedEntity *Module) { + DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); + insertDIE(Module, IMDie); DIE *EntityDie; - auto *Entity = IE->getEntity(); + auto *Entity = Module->getEntity(); if (auto *NS = dyn_cast(Entity)) EntityDie = getOrCreateNameSpace(NS); else if (auto *M = dyn_cast(Entity)) EntityDie = getOrCreateModule(M); - else if (auto *SP = dyn_cast(Entity)) { - // If we have abstract subprogram created, refer it. - if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP)) - EntityDie = AbsSPDie; - else - EntityDie = getOrCreateSubprogramDIE(SP); - } else if (auto *T = dyn_cast(Entity)) + else if (auto *SP = dyn_cast(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast(Entity)) EntityDie = getOrCreateGlobalVariableDIE(GV, {}); else EntityDie = getDIE(Entity); assert(EntityDie); - - addSourceLine(*IMDie, IE->getLine(), IE->getFile()); + addSourceLine(*IMDie, Module->getLine(), Module->getFile()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = IE->getName(); + StringRef Name = Module->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); // This is for imported module with renamed entities (such as variables and // subprograms). - DINodeArray Elements = IE->getElements(); + DINodeArray Elements = Module->getElements(); for (const auto *Element : Elements) { if (!Element) continue; - IMDie->addChild(createImportedEntityDIE(cast(Element))); + IMDie->addChild( + constructImportedEntityDIE(cast(Element))); } - return IMDie; -} -void DwarfCompileUnit::createAndAddImportedEntityDIE( - const DIImportedEntity *IE) { - DIE *ContextDIE = getOrCreateContextDIE(IE->getScope()); - assert(ContextDIE && - "Could not get or create scope for the imported entity!"); - if (!ContextDIE) - return; - - ContextDIE->addChild(createImportedEntityDIE(IE)); + return IMDie; } void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); - if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) { + if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); @@ -1605,38 +1580,3 @@ void DwarfCompileUnit::createBaseTypeDIEs() { Btr.Die = &Die; } } - -static DIE * -findLocalScopeDIE(const DILocalScope *LS, - DenseMap &ScopeDIEs) { - DIE *ScopeDIE = ScopeDIEs.lookup(LS); - if (isa(LS) && !ScopeDIE) - return nullptr; - if (!ScopeDIE) - return findLocalScopeDIE(cast(LS->getScope()), ScopeDIEs); - return ScopeDIE; -} - -DIE *DwarfCompileUnit::findLocalScopeDIE(const DIScope *S) { - auto *LScope = dyn_cast_or_null(S); - if (!LScope) - return nullptr; - - // Check if we have an abstract tree. - if (getAbstractScopeDIEs().count(LScope->getSubprogram())) - return ::findLocalScopeDIE(LScope, getAbstractScopeDIEs()); - - return ::findLocalScopeDIE(LScope, LocalScopeDIEs); -} - -DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) { - if (auto *LScope = dyn_cast_or_null(Context)) { - if (DIE *ScopeDIE = findLocalScopeDIE(LScope)) - return ScopeDIE; - - // If nothing was found, fall back to DISubprogram and let - // DwarfUnit::getOrCreateContextDIE() create a new DIE for it. - Context = LScope->getSubprogram(); - } - return DwarfUnit::getOrCreateContextDIE(Context); -} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 251bc4bea68b..fb03982b5e4a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -62,6 +62,11 @@ class DwarfCompileUnit final : public DwarfUnit { /// The start of the unit macro info within macro section. MCSymbol *MacroLabelBegin; + using ImportedEntityList = SmallVector; + using ImportedEntityMap = DenseMap; + + ImportedEntityMap ImportedEntities; + /// GlobalNames - A map of globally visible named entities for this unit. StringMap GlobalNames; @@ -75,14 +80,9 @@ class DwarfCompileUnit final : public DwarfUnit { // ranges/locs. const MCSymbol *BaseAddress = nullptr; - DenseMap LocalScopeDIEs; - DenseMap AbstractLocalScopeDIEs; + DenseMap AbstractSPDies; DenseMap> AbstractEntities; - /// LocalScopesWithLocalDecls - A list of non-empty local scopes - /// (with declaraions of static locals, function-local types, or imports). - SmallPtrSet LocalScopesWithLocalDecls; - /// DWO ID for correlating skeleton and split units. uint64_t DWOId = 0; @@ -92,10 +92,10 @@ class DwarfCompileUnit final : public DwarfUnit { bool isDwoUnit() const override; - DenseMap &getAbstractScopeDIEs() { + DenseMap &getAbstractSPDies() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) - return AbstractLocalScopeDIEs; - return DU->getAbstractScopeDIEs(); + return AbstractSPDies; + return DU->getAbstractSPDies(); } DenseMap> &getAbstractEntities() { @@ -169,6 +169,17 @@ class DwarfCompileUnit final : public DwarfUnit { unsigned getOrCreateSourceID(const DIFile *File) override; + void addImportedEntity(const DIImportedEntity* IE) { + DIScope *Scope = IE->getScope(); + assert(Scope && "Invalid Scope encoding!"); + if (!isa(Scope)) + // No need to add imported enities that are not local declaration. + return; + + auto *LocalScope = cast(Scope)->getNonLexicalBlockFileScope(); + ImportedEntities[LocalScope].push_back(IE); + } + /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); @@ -210,9 +221,6 @@ class DwarfCompileUnit final : public DwarfUnit { void createBaseTypeDIEs(); - DIE *findLocalScopeDIE(const DIScope *S); - DIE *getOrCreateContextDIE(const DIScope *Ty) override; - /// Construct a DIE for this subprogram scope. DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); @@ -251,9 +259,8 @@ class DwarfCompileUnit final : public DwarfUnit { void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, SmallVector &Params); - /// Construct DIE for an imported entity. - DIE *createImportedEntityDIE(const DIImportedEntity *IE); - void createAndAddImportedEntityDIE(const DIImportedEntity *IE); + /// Construct import_module DIE. + DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); void finishEntityDefinition(const DbgEntity *Entity); @@ -350,10 +357,6 @@ class DwarfCompileUnit final : public DwarfUnit { bool hasDwarfPubSections() const; void addBaseTypeRef(DIEValueList &Die, int64_t Idx); - - void recordLocalScopeWithDecls(const DILocalScope *S) { - LocalScopesWithLocalDecls.insert(S); - } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8f5bc3a07c54..48134f1fd774 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -516,7 +516,7 @@ void DwarfDebug::addSubprogramNames(const DICompileUnit &CU, // well into the name table. Only do that if we are going to actually emit // that name. if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && - (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP))) + (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) addAccelName(CU, SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator @@ -1067,45 +1067,6 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, } } } - -// Collect local scopes that contain any local declarations -// (excluding local variables) to be sure they will be emitted -// (see DwarfCompileUnit::createAndAddScopeChildren() for details). -static void collectLocalScopesWithDeclsFromCU(const DICompileUnit *CUNode, - DwarfCompileUnit &CU) { - auto getLocalScope = [](const DIScope *S) -> const DILocalScope * { - if (!S) - return nullptr; - if (isa(S)) - S = S->getScope(); - if (const auto *LScope = dyn_cast_or_null(S)) - return LScope->getNonLexicalBlockFileScope(); - return nullptr; - }; - - for (auto *GVE : CUNode->getGlobalVariables()) - if (auto *LScope = getLocalScope(GVE->getVariable()->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *Ty : CUNode->getEnumTypes()) - if (auto *LScope = getLocalScope(Ty->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *Ty : CUNode->getRetainedTypes()) - if (DIType *RT = dyn_cast(Ty)) - if (auto *LScope = getLocalScope(RT->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *IE : CUNode->getImportedEntities()) - if (auto *LScope = getLocalScope(IE->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - // FIXME: We know nothing about local records and typedefs here. - // since nothing but local variables (and members of local records) - // references them. So that they will be emitted in a first available - // parent scope DIE. -} - // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & @@ -1120,6 +1081,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); + for (auto *IE : DIUnit->getImportedEntities()) + NewCU.addImportedEntity(IE); + // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the @@ -1139,12 +1103,42 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); + return NewCU; +} - // Record local scopes, that have some globals (static locals), - // imports or types declared within. - collectLocalScopesWithDeclsFromCU(DIUnit, NewCU); +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N) { + if (isa(N->getScope())) + return; + if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) + D->addChild(TheCU.constructImportedEntityDIE(N)); +} - return NewCU; +/// Sort and unique GVEs by comparing their fragment offset. +static SmallVectorImpl & +sortGlobalExprs(SmallVectorImpl &GVEs) { + llvm::sort( + GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; + }); + GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), + [](DwarfCompileUnit::GlobalExpr A, + DwarfCompileUnit::GlobalExpr B) { + return A.Expr == B.Expr; + }), + GVEs.end()); + return GVEs; } // Emit all Dwarf sections that should come prior to the content. Create @@ -1162,6 +1156,14 @@ void DwarfDebug::beginModule(Module *M) { assert(MMI->hasDebugInfo() && "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; + DenseMap> + GVMap; + for (const GlobalVariable &Global : M->globals()) { + SmallVector GVs; + Global.getDebugInfo(GVs); + for (auto *GVE : GVs) + GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); + } // Create the symbol that designates the start of the unit's contribution // to the string offsets table. In a split DWARF scenario, only the skeleton @@ -1187,6 +1189,56 @@ void DwarfDebug::beginModule(Module *M) { // address table (.debug_addr) header. AddrPool.setLabel(Asm->createTempSymbol("addr_table_base")); DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); + + for (DICompileUnit *CUNode : M->debug_compile_units()) { + // FIXME: Move local imported entities into a list attached to the + // subprogram, then this search won't be needed and a + // getImportedEntities().empty() test should go below with the rest. + bool HasNonLocalImportedEntities = llvm::any_of( + CUNode->getImportedEntities(), [](const DIImportedEntity *IE) { + return !isa(IE->getScope()); + }); + + if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() && + CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty()) + continue; + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); + + // Global Variables. + for (auto *GVE : CUNode->getGlobalVariables()) { + // Don't bother adding DIGlobalVariableExpressions listed in the CU if we + // already know about the variable and it isn't adding a constant + // expression. + auto &GVMapEntry = GVMap[GVE->getVariable()]; + auto *Expr = GVE->getExpression(); + if (!GVMapEntry.size() || (Expr && Expr->isConstant())) + GVMapEntry.push_back({nullptr, Expr}); + } + + DenseSet Processed; + for (auto *GVE : CUNode->getGlobalVariables()) { + DIGlobalVariable *GV = GVE->getVariable(); + if (Processed.insert(GV).second) + CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); + } + + for (auto *Ty : CUNode->getEnumTypes()) + CU.getOrCreateTypeDIE(cast(Ty)); + + for (auto *Ty : CUNode->getRetainedTypes()) { + // The retained types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + if (DIType *RT = dyn_cast(Ty)) + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); + } + // Emit imported_modules last so that the relevant context is already + // available. + for (auto *IE : CUNode->getImportedEntities()) + constructAndAddImportedEntityDIE(CU, IE); + } } void DwarfDebug::finishEntityDefinitions() { @@ -1351,48 +1403,6 @@ void DwarfDebug::finalizeModuleInfo() { SkeletonHolder.computeSizeAndOffsets(); } -/// Sort and unique GVEs by comparing their fragment offset. -static SmallVectorImpl & -sortGlobalExprs(SmallVectorImpl &GVEs) { - llvm::sort( - GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { - // Sort order: first null exprs, then exprs without fragment - // info, then sort by fragment offset in bits. - // FIXME: Come up with a more comprehensive comparator so - // the sorting isn't non-deterministic, and so the following - // std::unique call works correctly. - if (!A.Expr || !B.Expr) - return !!B.Expr; - auto FragmentA = A.Expr->getFragmentInfo(); - auto FragmentB = B.Expr->getFragmentInfo(); - if (!FragmentA || !FragmentB) - return !!FragmentB; - return FragmentA->OffsetInBits < FragmentB->OffsetInBits; - }); - GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), - [](DwarfCompileUnit::GlobalExpr A, - DwarfCompileUnit::GlobalExpr B) { - return A.Expr == B.Expr; - }), - GVEs.end()); - return GVEs; -} - -/// Create a DIE for \p Ty if it doesn't already exist. If type units are -/// enabled, try to emit a type unit without a CU skeleton DIE. -static void createMaybeUnusedType(DwarfDebug &DD, DwarfCompileUnit &CU, - DIType &Ty) { - // Try to generate a type unit without creating a skeleton DIE in this CU. - if (DICompositeType const *CTy = dyn_cast(&Ty)) { - MDString const *TypeId = CTy->getRawIdentifier(); - if (DD.generateTypeUnits() && TypeId && !Ty.isForwardDecl()) - if (DD.getOrCreateDwarfTypeUnit(CU, TypeId->getString(), CTy)) - return; - } - // We couldn't or shouldn't add a type unit so create the DIE normally. - CU.getOrCreateTypeDIE(&Ty); -} - // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { // Terminate the pending line table. @@ -1402,69 +1412,9 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); - // Collect global variables info. - DenseMap> - GVMap; - for (const GlobalVariable &Global : MMI->getModule()->globals()) { - SmallVector GVs; - Global.getDebugInfo(GVs); - for (auto *GVE : GVs) - GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); - } - - for (DICompileUnit *CUNode : MMI->getModule()->debug_compile_units()) { - auto *CU = CUMap.lookup(CUNode); - - // If this CU hasn't been emitted yet, create it here unless it is empty. - if (!CU) { - // FIXME: Move local imported entities into a list attached to the - // subprogram, then this search won't be needed and a - // getImportedEntities().empty() test should go below with the rest. - bool HasNonLocalImportedEntities = llvm::any_of( - CUNode->getImportedEntities(), [](const DIImportedEntity *IE) { - return !isa(IE->getScope()); - }); - - if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() && - CUNode->getRetainedTypes().empty() && - CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty()) - continue; - - CU = &getOrCreateDwarfCompileUnit(CUNode); - } - - // Global Variables. - for (auto *GVE : CUNode->getGlobalVariables()) { - // Don't bother adding DIGlobalVariableExpressions listed in the CU if we - // already know about the variable and it isn't adding a constant - // expression. - auto &GVMapEntry = GVMap[GVE->getVariable()]; - auto *Expr = GVE->getExpression(); - if (!GVMapEntry.size() || (Expr && Expr->isConstant())) - GVMapEntry.push_back({nullptr, Expr}); - } - - DenseSet Processed; - for (auto *GVE : CUNode->getGlobalVariables()) { - DIGlobalVariable *GV = GVE->getVariable(); - if (Processed.insert(GV).second) - CU->getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); - } - - for (auto *Ty : CUNode->getEnumTypes()) - createMaybeUnusedType(*this, *CU, *Ty); - - for (auto *Ty : CUNode->getRetainedTypes()) { - if (DIType *RT = dyn_cast(Ty)) - createMaybeUnusedType(*this, *CU, *RT); - } - - // Emit imported entities last so that the relevant context - // is already available. - for (auto *IE : CUNode->getImportedEntities()) - CU->createAndAddImportedEntityDIE(IE); - - CU->createBaseTypeDIEs(); + for (const auto &P : CUMap) { + auto &CU = *P.second; + CU.createBaseTypeDIEs(); } // If we aren't actually generating debug info (check beginModule - @@ -3416,30 +3366,17 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &RefDie, const DICompositeType *CTy) { - bool TopLevelType = TypeUnitsUnderConstruction.empty(); - if (auto Signature = getOrCreateDwarfTypeUnit(CU, Identifier, CTy)) { - CU.addDIETypeSignature(RefDie, *Signature); - } else if (TopLevelType) { - // Construct this type in the CU directly. - // This is inefficient because all the dependent types will be rebuilt - // from scratch, including building them in type units, discovering that - // they depend on addresses, throwing them out and rebuilding them. - CU.constructTypeDIE(RefDie, cast(CTy)); - } -} - -Optional -DwarfDebug::getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, StringRef Identifier, - const DICompositeType *CTy) { // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) - return None; + return; auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); - if (!Ins.second) - return Ins.first->second; + if (!Ins.second) { + CU.addDIETypeSignature(RefDie, Ins.first->second); + return; + } bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); @@ -3493,7 +3430,13 @@ DwarfDebug::getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, StringRef Identifier, // the type that used an address. for (const auto &TU : TypeUnitsToAdd) TypeSignatures.erase(TU.second); - return None; + + // Construct this type in the CU directly. + // This is inefficient because all the dependent types will be rebuilt + // from scratch, including building them in type units, discovering that + // they depend on addresses, throwing them out and rebuilding them. + CU.constructTypeDIE(RefDie, cast(CTy)); + return; } // If the type wasn't dependent on fission addresses, finish adding the type @@ -3503,7 +3446,7 @@ DwarfDebug::getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, StringRef Identifier, InfoHolder.emitUnit(TU.first.get(), useSplitDwarf()); } } - return Signature; + CU.addDIETypeSignature(RefDie, Signature); } DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 79f8423af602..4e1a1b1e068d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -316,13 +316,11 @@ class DwarfDebug : public DebugHandlerBase { /// can refer to them in spite of insertions into this list. DebugLocStream DebugLocs; - using SubprogramSetVector = - SetVector, - SmallPtrSet>; - /// This is a collection of subprogram MDNodes that are processed to /// create DIEs. - SubprogramSetVector ProcessedSPNodes; + SetVector, + SmallPtrSet> + ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn = nullptr; @@ -600,6 +598,10 @@ class DwarfDebug : public DebugHandlerBase { void finishUnitAttributes(const DICompileUnit *DIUnit, DwarfCompileUnit &NewCU); + /// Construct imported_module or imported_declaration DIE. + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N); + /// Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. @@ -665,12 +667,6 @@ class DwarfDebug : public DebugHandlerBase { void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); - /// Return the type unit signature of \p CTy after finding or creating its - /// type unit. Return None if a type unit cannot be created for \p CTy. - Optional getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, - StringRef Identifier, - const DICompositeType *CTy); - class NonTypeUnitContext { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 464f4f048016..79a6ce7801b7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -26,7 +26,6 @@ class DbgEntity; class DbgVariable; class DbgLabel; class DINode; -class DILocalScope; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; @@ -88,7 +87,7 @@ class DwarfFile { DenseMap ScopeLabels; // Collection of abstract subprogram DIEs. - DenseMap AbstractLocalScopeDIEs; + DenseMap AbstractSPDies; DenseMap> AbstractEntities; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can @@ -163,8 +162,8 @@ class DwarfFile { return ScopeLabels; } - DenseMap &getAbstractScopeDIEs() { - return AbstractLocalScopeDIEs; + DenseMap &getAbstractSPDies() { + return AbstractSPDies; } DenseMap> &getAbstractEntities() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 7edf9be92eac..6b6d63f14f87 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1219,7 +1219,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, "decl has a linkage name and it is different"); if (DeclLinkageName.empty() && // Always emit it for abstract subprograms. - (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP))) + (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP))) addLinkageName(SPDie, LinkageName); if (!DeclDie) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 1643edf05b7a..54b0079dd7ce 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -250,7 +250,7 @@ class DwarfUnit : public DIEUnit { DIE *getOrCreateTypeDIE(const MDNode *TyNode); /// Get context owner's DIE. - virtual DIE *getOrCreateContextDIE(const DIScope *Context); + DIE *getOrCreateContextDIE(const DIScope *Context); /// Construct DIEs for types that contain vtables. void constructContainingTypeDIEs(); diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 17094a8e44f8..d061664e8c5d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -256,7 +256,7 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, LLT PartLLT = MRI.getType(SrcRegs[0]); // Deal with v3s16 split into v2s16 - LLT LCMTy = getLCMType(LLTy, PartLLT); + LLT LCMTy = getCoverTy(LLTy, PartLLT); if (LCMTy == LLTy) { // Common case where no padding is needed. assert(DstRegs.size() == 1); @@ -267,21 +267,9 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, // widening the original value. Register UnmergeSrcReg; if (LCMTy != PartLLT) { - // e.g. A <3 x s16> value was split to <2 x s16> - // %register_value0:_(<2 x s16>) - // %register_value1:_(<2 x s16>) - // %undef:_(<2 x s16>) = G_IMPLICIT_DEF - // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef - // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat - const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); - Register Undef = B.buildUndef(PartLLT).getReg(0); - - // Build vector of undefs. - SmallVector WidenedSrcs(NumWide, Undef); - - // Replace the first sources with the real registers. - std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); - UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0); + assert(DstRegs.size() == 1); + return B.buildDeleteTrailingVectorElements(DstRegs[0], + B.buildMerge(LCMTy, SrcRegs)); } else { // We don't need to widen anything if we're extracting a scalar which was // promoted to a vector e.g. s8 -> v4s8 -> s8 @@ -298,6 +286,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, for (int I = DstRegs.size(); I != NumDst; ++I) PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); + if (PadDstRegs.size() == 1) + return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg); return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); } @@ -485,7 +475,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, MachineRegisterInfo &MRI = *B.getMRI(); LLT DstTy = MRI.getType(DstRegs[0]); - LLT LCMTy = getLCMType(SrcTy, PartTy); + LLT LCMTy = getCoverTy(SrcTy, PartTy); const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); @@ -493,7 +483,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, Register UnmergeSrc = SrcReg; - if (CoveringSize != SrcSize) { + if (!LCMTy.isVector() && CoveringSize != SrcSize) { // For scalars, it's common to be able to use a simple extension. if (SrcTy.isScalar() && DstTy.isScalar()) { CoveringSize = alignTo(SrcSize, DstSize); @@ -510,14 +500,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, } } - // Unmerge to the original registers and pad with dead defs. - SmallVector UnmergeResults(DstRegs.begin(), DstRegs.end()); - for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize; - Size += DstSize) { - UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); - } + if (LCMTy.isVector() && CoveringSize != SrcSize) + UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0); - B.buildUnmerge(UnmergeResults, UnmergeSrc); + B.buildUnmerge(DstRegs, UnmergeSrc); } bool CallLowering::determineAndHandleAssignments( diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e09cd26eb0c1..e8a8efd5dad4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -176,16 +176,18 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } + // Perform irregular split. Leftover is last element of RegPieces. if (MainTy.isVector()) { - unsigned EltSize = MainTy.getScalarSizeInBits(); - if (LeftoverSize % EltSize != 0) - return false; - LeftoverTy = LLT::scalarOrVector( - ElementCount::getFixed(LeftoverSize / EltSize), EltSize); - } else { - LeftoverTy = LLT::scalar(LeftoverSize); + SmallVector RegPieces; + extractVectorParts(Reg, MainTy.getNumElements(), RegPieces); + for (unsigned i = 0; i < RegPieces.size() - 1; ++i) + VRegs.push_back(RegPieces[i]); + LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]); + LeftoverTy = MRI.getType(LeftoverRegs[0]); + return true; } + LeftoverTy = LLT::scalar(LeftoverSize); // For irregular sizes, extract the individual parts. for (unsigned I = 0; I != NumParts; ++I) { Register NewReg = MRI.createGenericVirtualRegister(MainTy); @@ -203,6 +205,44 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts, + SmallVectorImpl &VRegs) { + LLT RegTy = MRI.getType(Reg); + assert(RegTy.isVector() && "Expected a vector type"); + + LLT EltTy = RegTy.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + unsigned RegNumElts = RegTy.getNumElements(); + unsigned LeftoverNumElts = RegNumElts % NumElts; + unsigned NumNarrowTyPieces = RegNumElts / NumElts; + + // Perfect split without leftover + if (LeftoverNumElts == 0) + return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs); + + // Irregular split. Provide direct access to all elements for artifact + // combiner using unmerge to elements. Then build vectors with NumElts + // elements. Remaining element(s) will be (used to build vector) Leftover. + SmallVector Elts; + extractParts(Reg, EltTy, RegNumElts, Elts); + + unsigned Offset = 0; + // Requested sub-vectors of NarrowTy. + for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) { + ArrayRef Pieces(&Elts[Offset], NumElts); + VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } + + // Leftover element(s). + if (LeftoverNumElts == 1) { + VRegs.push_back(Elts[Offset]); + } else { + LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy); + ArrayRef Pieces(&Elts[Offset], LeftoverNumElts); + VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0)); + } +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef PartRegs, @@ -223,6 +263,15 @@ void LegalizerHelper::insertParts(Register DstReg, return; } + // Merge sub-vectors with different number of elements and insert into DstReg. + if (ResultTy.isVector()) { + assert(LeftoverRegs.size() == 1 && "Expected one leftover register"); + SmallVector AllRegs; + for (auto Reg : concat(PartRegs, LeftoverRegs)) + AllRegs.push_back(Reg); + return mergeMixedSubvectors(DstReg, AllRegs); + } + SmallVector GCDRegs; LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy); for (auto PartReg : concat(PartRegs, LeftoverRegs)) @@ -231,6 +280,30 @@ void LegalizerHelper::insertParts(Register DstReg, buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); } +void LegalizerHelper::appendVectorElts(SmallVectorImpl &Elts, + Register Reg) { + LLT Ty = MRI.getType(Reg); + SmallVector RegElts; + extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts); + Elts.append(RegElts); +} + +/// Merge \p PartRegs with different types into \p DstReg. +void LegalizerHelper::mergeMixedSubvectors(Register DstReg, + ArrayRef PartRegs) { + SmallVector AllElts; + for (unsigned i = 0; i < PartRegs.size() - 1; ++i) + appendVectorElts(AllElts, PartRegs[i]); + + Register Leftover = PartRegs[PartRegs.size() - 1]; + if (MRI.getType(Leftover).isScalar()) + AllElts.push_back(Leftover); + else + appendVectorElts(AllElts, Leftover); + + MIRBuilder.buildMerge(DstReg, AllElts); +} + /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl &Regs, const MachineInstr &MI) { @@ -916,8 +989,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } - case TargetOpcode::G_FREEZE: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_FREEZE: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + // Should widen scalar first + if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0) + return UnableToLegalize; + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + SmallVector Parts; + for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) { + Parts.push_back( + MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0)); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_SADDO: @@ -1372,37 +1463,17 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); + Register Dst = MO.getReg(); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + MO.setReg(DstExt); + MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - - LLT OldTy = MRI.getType(MO.getReg()); - unsigned OldElts = OldTy.getNumElements(); - unsigned NewElts = MoreTy.getNumElements(); - - unsigned NumParts = NewElts / OldElts; - - // Use concat_vectors if the result is a multiple of the number of elements. - if (NumParts * OldElts == NewElts) { - SmallVector Parts; - Parts.push_back(MO.getReg()); - - Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); - for (unsigned I = 1; I != NumParts; ++I) - Parts.push_back(ImpDef); - - auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); - MO.setReg(Concat.getReg(0)); - return; - } - - Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); - Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); - MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); - MO.setReg(MoreReg); + SmallVector Regs; + MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0)); } void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { @@ -3558,20 +3629,83 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LCMTy = getLCMType(DstTy, NarrowTy); +#ifndef NDEBUG +/// Check that all vector operands have same number of elements. Other operands +/// should be listed in NonVecOp. +static bool hasSameNumEltsOnAllVectorOperands( + GenericMachineInstr &MI, MachineRegisterInfo &MRI, + std::initializer_list NonVecOpIndices) { + if (MI.getNumMemOperands() != 0) + return false; - unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + LLT VecTy = MRI.getType(MI.getReg(0)); + if (!VecTy.isVector()) + return false; + unsigned NumElts = VecTy.getNumElements(); - auto NewUndef = MIRBuilder.buildUndef(NarrowTy); - SmallVector Parts(NumParts, NewUndef.getReg(0)); + for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + continue; + } - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - MI.eraseFromParent(); - return Legalized; + LLT Ty = MRI.getType(Op.getReg()); + if (!Ty.isVector()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + is_contained(NonVecOpIndices, OpIdx); + continue; + } + + if (Ty.getNumElements() != NumElts) + return false; + } + + return true; +} +#endif + +/// Fill \p DstOps with DstOps that have same number of elements combined as +/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are +/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple +/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements. +static void makeDstOps(SmallVectorImpl &DstOps, LLT Ty, + unsigned NumElts) { + LLT LeftoverTy; + assert(Ty.isVector() && "Expected vector type"); + LLT EltTy = Ty.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) = + getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy); + + assert(NumParts > 0 && "Error in getNarrowTypeBreakDown"); + for (int i = 0; i < NumParts; ++i) { + DstOps.push_back(NarrowTy); + } + + if (LeftoverTy.isValid()) { + assert(NumLeftover == 1 && "expected exactly one leftover"); + DstOps.push_back(LeftoverTy); + } +} + +/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps +/// made from \p Op depending on operand type. +static void broadcastSrcOp(SmallVectorImpl &Ops, unsigned N, + MachineOperand &Op) { + for (unsigned i = 0; i < N; ++i) { + if (Op.isReg()) + Ops.push_back(Op.getReg()); + else if (Op.isImm()) + Ops.push_back(Op.getImm()); + else if (Op.isPredicate()) + Ops.push_back(static_cast(Op.getPredicate())); + else + llvm_unreachable("Unsupported type"); + } } // Handle splitting vector operations which need to have the same number of @@ -3588,335 +3722,116 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( // s64 = G_SHL s64, s32 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMultiEltType( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { - if (TypeIdx != 0) - return UnableToLegalize; - - const LLT NarrowTy0 = NarrowTyArg; - const Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LeftoverTy0; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; - if (NumParts < 0) - return UnableToLegalize; - - SmallVector NewInsts; - - SmallVector DstRegs, LeftoverDstRegs; - SmallVector PartRegs, LeftoverRegs; - - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); - LLT SrcTyI = MRI.getType(SrcReg); - const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() - : ElementCount::getFixed(1); - LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); - LLT LeftoverTyI; - - // Split this operand into the requested typed registers, and any leftover - // required to reproduce the original type. - if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - if (I == 1) { - // For the first operand, create an instruction for each part and setup - // the result. - for (Register PartReg : PartRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(PartReg)); - DstRegs.push_back(PartDstReg); - } - - for (Register LeftoverReg : LeftoverRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(LeftoverReg)); - LeftoverDstRegs.push_back(PartDstReg); - } + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list NonVecOpIndices) { + assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) && + "Non-compatible opcode or not specified non-vector operands"); + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output. + // Build instructions with DstOps to use instruction found by CSE directly. + // CSE copies found instruction into given vreg when building with vreg dest. + SmallVector, 2> OutputOpsPieces(NumDefs); + // Output registers will be taken from created instructions. + SmallVector, 2> OutputRegs(NumDefs); + for (unsigned i = 0; i < NumDefs; ++i) { + makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts); + } + + // Split vector input operands into sub-vectors with NumElts elts + Leftover. + // Operands listed in NonVecOpIndices will be used as is without splitting; + // examples: compare predicate in icmp and fcmp (op 1), vector select with i1 + // scalar condition (op 1), immediate in sext_inreg (op 2). + SmallVector, 3> InputOpsPieces(NumInputs); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + ++UseIdx, ++UseNo) { + if (is_contained(NonVecOpIndices, UseIdx)) { + broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(), + MI.getOperand(UseIdx)); } else { - assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - unsigned InstCount = 0; - for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(PartRegs[J]); - for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(LeftoverRegs[J]); + SmallVector SplitPieces; + extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces); + for (auto Reg : SplitPieces) + InputOpsPieces[UseNo].push_back(Reg); } - - PartRegs.clear(); - LeftoverRegs.clear(); } - // Insert the newly built operations and rebuild the result register. - for (auto &MIB : NewInsts) - MIRBuilder.insertInstr(MIB); + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; - insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); + // Take i-th piece of each input operand split and build sub-vector/scalar + // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s). + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + SmallVector Defs; + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + Defs.push_back(OutputOpsPieces[DstNo][i]); - MI.eraseFromParent(); - return Legalized; -} + SmallVector Uses; + for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo) + Uses.push_back(InputOpsPieces[InputNo][i]); -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - - LLT NarrowTy0 = NarrowTy; - LLT NarrowTy1; - unsigned NumParts; - - if (NarrowTy.isVector()) { - // Uneven breakdown not handled. - NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); - if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) - return UnableToLegalize; - - NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); - } else { - NumParts = DstTy.getNumElements(); - NarrowTy1 = SrcTy.getElementType(); + auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags()); + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + OutputRegs[DstNo].push_back(I.getReg(DstNo)); } - SmallVector SrcRegs, DstRegs; - extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); - - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MachineInstr *NewInst = - MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); - - NewInst->setFlags(MI.getFlags()); - DstRegs.push_back(DstReg); + // Merge small outputs into MI's output for each def operand. + if (NumLeftovers) { + for (unsigned i = 0; i < NumDefs; ++i) + mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]); + } else { + for (unsigned i = 0; i < NumDefs; ++i) + MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - MI.eraseFromParent(); return Legalized; } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(Src0Reg); - - unsigned NumParts; - LLT NarrowTy0, NarrowTy1; - - if (TypeIdx == 0) { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = DstTy.getNumElements(); - - NarrowTy0 = NarrowTy; - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); - NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), - SrcTy.getScalarSizeInBits()) - : SrcTy.getElementType(); - - } else { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = SrcTy.getNumElements(); - - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : - NarrowTy.getNumElements(); - NarrowTy0 = - LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); - NarrowTy1 = NarrowTy; +LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts) { + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + SmallVector OutputOpsPieces; + SmallVector OutputRegs; + makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts); + + // Instructions that perform register split will be inserted in basic block + // where register is defined (basic block is in the next operand). + SmallVector, 3> InputOpsPieces(NumInputs / 2); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + UseIdx += 2, ++UseNo) { + MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]); } - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (NarrowTy1.isVector() && - NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) - return UnableToLegalize; - - CmpInst::Predicate Pred - = static_cast(MI.getOperand(1).getPredicate()); - - SmallVector Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + // Build PHIs with fewer elements. + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; + MIRBuilder.setInsertPt(*MI.getParent(), MI); + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI); + Phi.addDef( + MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI))); + OutputRegs.push_back(Phi.getReg(0)); - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - DstRegs.push_back(DstReg); - - if (MI.getOpcode() == TargetOpcode::G_ICMP) - MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - else { - MachineInstr *NewCmp - = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - NewCmp->setFlags(MI.getFlags()); + for (unsigned j = 0; j < NumInputs / 2; ++j) { + Phi.addUse(InputOpsPieces[j][i]); + Phi.add(MI.getOperand(1 + j * 2 + 1)); } } - if (NarrowTy1.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register CondReg = MI.getOperand(1).getReg(); - - unsigned NumParts = 0; - LLT NarrowTy0, NarrowTy1; - - LLT DstTy = MRI.getType(DstReg); - LLT CondTy = MRI.getType(CondReg); - unsigned Size = DstTy.getSizeInBits(); - - assert(TypeIdx == 0 || CondTy.isVector()); - - if (TypeIdx == 0) { - NarrowTy0 = NarrowTy; - NarrowTy1 = CondTy; - - unsigned NarrowSize = NarrowTy0.getSizeInBits(); - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) - return UnableToLegalize; - - NumParts = Size / NarrowSize; - - // Need to break down the condition type - if (CondTy.isVector()) { - if (CondTy.getNumElements() == NumParts) - NarrowTy1 = CondTy.getElementType(); - else - NarrowTy1 = - LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), - CondTy.getScalarSizeInBits()); - } + // Merge small outputs into MI's def. + if (NumLeftovers) { + mergeMixedSubvectors(MI.getReg(0), OutputRegs); } else { - NumParts = CondTy.getNumElements(); - if (NarrowTy.isVector()) { - // TODO: Handle uneven breakdown. - if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) - return UnableToLegalize; - - return UnableToLegalize; - } else { - NarrowTy0 = DstTy.getElementType(); - NarrowTy1 = NarrowTy; - } - } - - SmallVector DstRegs, Src0Regs, Src1Regs, Src2Regs; - if (CondTy.isVector()) - extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); - - extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); - - for (unsigned i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, - Src1Regs[i], Src2Regs[i]); - DstRegs.push_back(DstReg); - } - - if (NarrowTy0.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - const Register DstReg = MI.getOperand(0).getReg(); - LLT PhiTy = MRI.getType(DstReg); - LLT LeftoverTy; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts, NumLeftover; - std::tie(NumParts, NumLeftover) - = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); - if (NumParts < 0) - return UnableToLegalize; - - SmallVector DstRegs, LeftoverDstRegs; - SmallVector NewInsts; - - const int TotalNumParts = NumParts + NumLeftover; - - // Insert the new phis in the result block first. - for (int I = 0; I != TotalNumParts; ++I) { - LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; - Register PartDstReg = MRI.createGenericVirtualRegister(Ty); - NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) - .addDef(PartDstReg)); - if (I < NumParts) - DstRegs.push_back(PartDstReg); - else - LeftoverDstRegs.push_back(PartDstReg); - } - - MachineBasicBlock *MBB = MI.getParent(); - MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); - insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); - - SmallVector PartRegs, LeftoverRegs; - - // Insert code to extract the incoming values in each predecessor block. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { - PartRegs.clear(); - LeftoverRegs.clear(); - - Register SrcReg = MI.getOperand(I).getReg(); - MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); - - LLT Unused; - if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - for (int J = 0; J != TotalNumParts; ++J) { - MachineInstrBuilder MIB = NewInsts[J]; - MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); - MIB.addMBB(&OpMBB); - } + MIRBuilder.buildMerge(MI.getReg(0), OutputRegs); } MI.eraseFromParent(); @@ -3927,27 +3842,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - if (TypeIdx != 1) - return UnableToLegalize; - const int NumDst = MI.getNumOperands() - 1; const Register SrcReg = MI.getOperand(NumDst).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(SrcReg); - // TODO: Create sequence of extracts. - if (DstTy == NarrowTy) + if (TypeIdx != 1 || NarrowTy == DstTy) return UnableToLegalize; - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - if (DstTy == GCDTy) { - // This would just be a copy of the same unmerge. - // TODO: Create extracts, pad with undef and create intermediate merges. + // Requires compatible types. Otherwise SrcReg should have been defined by + // merge-like instruction that would get artifact combined. Most likely + // instruction that defines SrcReg has to perform more/fewer elements + // legalization compatible with NarrowTy. + assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + + if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0)) return UnableToLegalize; - } - auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + // This is most likely DstTy (smaller then register size) packed in SrcTy + // (larger then register size) and since unmerge was not combined it will be + // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy + // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy. + + // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy) + // + // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence + // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg + // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy) + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg); const int NumUnmerge = Unmerge->getNumOperands() - 1; const int PartsPerUnmerge = NumDst / NumUnmerge; @@ -3963,90 +3887,88 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register Result = MI.getOperand(0).getReg(); - Register Overflow = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); - - LLT SrcTy = MRI.getType(LHS); - if (!SrcTy.isVector()) - return UnableToLegalize; - - LLT ElementType = SrcTy.getElementType(); - LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); - const ElementCount NumResult = SrcTy.getElementCount(); - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - - // Unmerge the operands to smaller parts of GCD type. - auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); - auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); - - const int NumOps = UnmergeLHS->getNumOperands() - 1; - const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); - LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); - LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); - - // Perform the operation over unmerged parts. - SmallVector ResultParts; - SmallVector OverflowParts; - for (int I = 0; I != NumOps; ++I) { - Register Operand1 = UnmergeLHS->getOperand(I).getReg(); - Register Operand2 = UnmergeRHS->getOperand(I).getReg(); - auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, - {Operand1, Operand2}); - ResultParts.push_back(PartMul->getOperand(0).getReg()); - OverflowParts.push_back(PartMul->getOperand(1).getReg()); - } - - LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); - LLT OverflowLCMTy = - LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); - - // Recombine the pieces to the original result and overflow registers. - buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); - buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); - MI.eraseFromParent(); - return Legalized; -} - -// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces -// a vector -// -// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with -// undef as necessary. -// -// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 -// -> <2 x s16> -// -// %4:_(s16) = G_IMPLICIT_DEF -// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 -// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 -// %7:_(<2 x s16>) = G_IMPLICIT_DEF -// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 -// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + // Requires compatible types. Otherwise user of DstReg did not perform unmerge + // that should have been artifact combined. Most likely instruction that uses + // DstReg has to do more/fewer elements legalization compatible with NarrowTy. + assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if (NarrowTy == SrcTy) + return UnableToLegalize; - // Break into a common type - SmallVector Parts; - for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) - extractGCDType(Parts, GCDTy, MO.getReg()); + // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use + // is for old mir tests. Since the changes to more/fewer elements it should no + // longer be possible to generate MIR like this when starting from llvm-ir + // because LCMTy approach was replaced with merge/unmerge to vector elements. + if (TypeIdx == 1) { + assert(SrcTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getNumElements() >= SrcTy.getNumElements())) + return UnableToLegalize; + // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy) + // + // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy) + // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy) + // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4 + // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6 + // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8 + // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11 + + SmallVector Elts; + LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType(); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) { + auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg()); + for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j) + Elts.push_back(Unmerge.getReg(j)); + } - // Build the requested new merge, padding with undef. - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, - TargetOpcode::G_ANYEXT); + SmallVector NarrowTyElts; + unsigned NumNarrowTyElts = NarrowTy.getNumElements(); + unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts; + for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces; + ++i, Offset += NumNarrowTyElts) { + ArrayRef Pieces(&Elts[Offset], NumNarrowTyElts); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } - // Pack into the original result register. - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + MIRBuilder.buildMerge(DstReg, NarrowTyElts); + MI.eraseFromParent(); + return Legalized; + } + + assert(TypeIdx == 0 && "Bad type index"); + if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) || + (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0)) + return UnableToLegalize; + // This is most likely SrcTy (smaller then register size) packed in DstTy + // (larger then register size) and since merge was not combined it will be + // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy + // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy. + + // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4 + // + // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg + // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4 + // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence + SmallVector NarrowTyElts; + unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; + unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts; + for (unsigned i = 0; i < NumParts; ++i) { + SmallVector Sources; + for (unsigned j = 0; j < NumElts; ++j) + Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg()); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0)); + } + + MIRBuilder.buildMerge(DstReg, NarrowTyElts); MI.eraseFromParent(); return Legalized; } @@ -4217,164 +4139,15 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, - LLT NarrowTy) { - assert(TypeIdx == 0 && "only one type index expected"); - - const unsigned Opc = MI.getOpcode(); - const int NumDefOps = MI.getNumExplicitDefs(); - const int NumSrcOps = MI.getNumOperands() - NumDefOps; - const unsigned Flags = MI.getFlags(); - const unsigned NarrowSize = NarrowTy.getSizeInBits(); - const LLT NarrowScalarTy = LLT::scalar(NarrowSize); - - assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " - "result and 1-3 sources or 2 results and " - "1-2 sources"); - - SmallVector DstRegs; - for (int I = 0; I < NumDefOps; ++I) - DstRegs.push_back(MI.getOperand(I).getReg()); - - // First of all check whether we are narrowing (changing the element type) - // or reducing the vector elements - const LLT DstTy = MRI.getType(DstRegs[0]); - const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); - - SmallVector ExtractedRegs[3]; - SmallVector Parts; - - // Break down all the sources into NarrowTy pieces we can operate on. This may - // involve creating merges to a wider type, padded with undef. - for (int I = 0; I != NumSrcOps; ++I) { - Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. - // For fewerElements, this is a smaller vector with the same element type. - LLT OpNarrowTy; - if (IsNarrow) { - OpNarrowTy = NarrowScalarTy; - - // In case of narrowing, we need to cast vectors to scalars for this to - // work properly - // FIXME: Can we do without the bitcast here if we're narrowing? - if (SrcTy.isVector()) { - SrcTy = LLT::scalar(SrcTy.getSizeInBits()); - SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); - } - } else { - auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() - : ElementCount::getFixed(1); - OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); - } - - LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); - - // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. - buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], - TargetOpcode::G_ANYEXT); - } - - SmallVector ResultRegs[2]; - - // Input operands for each sub-instruction. - SmallVector InputRegs(NumSrcOps, Register()); - - int NumParts = ExtractedRegs[0].size(); - const unsigned DstSize = DstTy.getSizeInBits(); - const LLT DstScalarTy = LLT::scalar(DstSize); - - // Narrowing needs to use scalar types - LLT DstLCMTy, NarrowDstTy; - if (IsNarrow) { - DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); - NarrowDstTy = NarrowScalarTy; - } else { - DstLCMTy = getLCMType(DstTy, NarrowTy); - NarrowDstTy = NarrowTy; - } - - // We widened the source registers to satisfy merge/unmerge size - // constraints. We'll have some extra fully undef parts. - const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; - - for (int I = 0; I != NumRealParts; ++I) { - // Emit this instruction on each of the split pieces. - for (int J = 0; J != NumSrcOps; ++J) - InputRegs[J] = ExtractedRegs[J][I]; - - MachineInstrBuilder Inst; - if (NumDefOps == 1) - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); - else - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, - Flags); - - for (int J = 0; J != NumDefOps; ++J) - ResultRegs[J].push_back(Inst.getReg(J)); - } - - // Fill out the widened result with undef instead of creating instructions - // with undef inputs. - int NumUndefParts = NumParts - NumRealParts; - if (NumUndefParts != 0) { - Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); - for (int I = 0; I != NumDefOps; ++I) - ResultRegs[I].append(NumUndefParts, Undef); - } - - // Extract the possibly padded result. Use a scratch register if we need to do - // a final bitcast, otherwise use the original result register. - Register MergeDstReg; - for (int I = 0; I != NumDefOps; ++I) { - if (IsNarrow && DstTy.isVector()) - MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); - else - MergeDstReg = DstRegs[I]; - - buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); - - // Recast to vector if we narrowed a vector - if (IsNarrow && DstTy.isVector()) - MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); - } - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - int64_t Imm = MI.getOperand(2).getImm(); - - LLT DstTy = MRI.getType(DstReg); - - SmallVector Parts; - LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); - - for (Register &R : Parts) - R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); - - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - - MI.eraseFromParent(); - return Legalized; -} - LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; + GenericMachineInstr &GMI = cast(MI); + unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; switch (MI.getOpcode()) { case G_IMPLICIT_DEF: - return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); case G_TRUNC: case G_AND: case G_OR: @@ -4439,10 +4212,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SSUBSAT: case G_UADDSAT: case G_USUBSAT: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); case G_UMULO: case G_SMULO: - return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: @@ -4454,7 +4225,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_CTTZ_ZERO_UNDEF: case G_CTPOP: case G_FCOPYSIGN: - return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: case G_ANYEXT: @@ -4467,14 +4237,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTTOPTR: case G_PTRTOINT: case G_ADDRSPACE_CAST: - return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: - return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); case G_SELECT: - return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return fewerElementsVectorMultiEltType(GMI, NumElts); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/}); case G_PHI: - return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + return fewerElementsVectorPhi(GMI, NumElts); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: @@ -4491,7 +4263,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_STORE: return reduceLoadStoreWidth(cast(MI), TypeIdx, NarrowTy); case G_SEXT_INREG: - return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/}); GISEL_VECREDUCE_CASES_NONSEQ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); case G_SHUFFLE_VECTOR: @@ -5053,6 +4825,15 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: @@ -5070,6 +4851,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FMA: + case TargetOpcode::G_FSHR: + case TargetOpcode::G_FSHL: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_EXTRACT: if (TypeIdx != 1) return UnableToLegalize; @@ -5079,6 +4871,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, return Legalized; case TargetOpcode::G_INSERT: case TargetOpcode::G_FREEZE: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FABS: + case TargetOpcode::G_BSWAP: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_SEXT_INREG: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5098,30 +4895,34 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UNMERGE_VALUES: { - if (TypeIdx != 1) - return UnableToLegalize; - - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - int NumDst = MI.getNumOperands() - 1; - moreElementsVectorSrc(MI, MoreTy, NumDst); - - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - for (int I = 0; I != NumDst; ++I) - MIB.addDef(MI.getOperand(I).getReg()); + case TargetOpcode::G_UNMERGE_VALUES: + return UnableToLegalize; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_BUILD_VECTOR: { + SmallVector Elts; + for (auto Op : MI.uses()) { + Elts.push_back(Op.getReg()); + } - int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); - for (int I = NumDst; I != NewNumDst; ++I) - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) { + Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType())); + } - MIB.addUse(MI.getOperand(NumDst).getReg()); + MIRBuilder.buildDeleteTrailingVectorElements( + MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts)); MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_PHI: - return moreElementsVectorPhi(MI, TypeIdx, MoreTy); - case TargetOpcode::G_SHUFFLE_VECTOR: - return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_TRUNC: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } @@ -6778,6 +6579,24 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { LLT VecTy = MRI.getType(SrcVec); LLT EltTy = VecTy.getElementType(); + unsigned NumElts = VecTy.getNumElements(); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) { + SmallVector SrcRegs; + extractParts(SrcVec, EltTy, NumElts, SrcRegs); + + if (InsertVal) { + SrcRegs[IdxVal] = MI.getOperand(2).getReg(); + MIRBuilder.buildMerge(DstReg, SrcRegs); + } else { + MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]); + } + + MI.eraseFromParent(); + return Legalized; + } + if (!EltTy.isByteSized()) { // Not implemented. LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); return UnableToLegalize; @@ -6796,7 +6615,6 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { // if the index is out of bounds. Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); - int64_t IdxVal; if (mi_match(Idx, MRI, m_ICst(IdxVal))) { int64_t Offset = IdxVal * EltBytes; PtrInfo = PtrInfo.getWithOffset(Offset); @@ -6923,6 +6741,32 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); + // Extract sub-vector or one element + if (SrcTy.isVector()) { + unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); + + if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) && + (Offset + DstSize <= SrcTy.getSizeInBits())) { + // Unmerge and allow access to each Src element for the artifact combiner. + auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src); + + // Take element(s) we need to extract and copy it (merge them). + SmallVector SubVectorElts; + for (unsigned Idx = Offset / SrcEltSize; + Idx < (Offset + DstSize) / SrcEltSize; ++Idx) { + SubVectorElts.push_back(Unmerge.getReg(Idx)); + } + if (SubVectorElts.size() == 1) + MIRBuilder.buildCopy(Dst, SubVectorElts[0]); + else + MIRBuilder.buildMerge(Dst, SubVectorElts); + + MI.eraseFromParent(); + return Legalized; + } + } + if (DstTy.isScalar() && (SrcTy.isScalar() || (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { @@ -6956,6 +6800,45 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { LLT DstTy = MRI.getType(Src); LLT InsertTy = MRI.getType(InsertSrc); + // Insert sub-vector or one element + if (DstTy.isVector() && !InsertTy.isPointer()) { + LLT EltTy = DstTy.getElementType(); + unsigned EltSize = EltTy.getSizeInBits(); + unsigned InsertSize = InsertTy.getSizeInBits(); + + if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) && + (Offset + InsertSize <= DstTy.getSizeInBits())) { + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src); + SmallVector DstElts; + unsigned Idx = 0; + // Elements from Src before insert start Offset + for (; Idx < Offset / EltSize; ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + // Replace elements in Src with elements from InsertSrc + if (InsertTy.getSizeInBits() > EltSize) { + auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc); + for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize; + ++Idx, ++i) { + DstElts.push_back(UnmergeInsertSrc.getReg(i)); + } + } else { + DstElts.push_back(InsertSrc); + ++Idx; + } + + // Remaining elements from Src after insert + for (; Idx < DstTy.getNumElements(); ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + MIRBuilder.buildMerge(Dst, DstElts); + MI.eraseFromParent(); + return Legalized; + } + } + if (InsertTy.isVector() || (DstTy.isVector() && DstTy.getElementType() != InsertTy)) return UnableToLegalize; diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index fb5ed35c1f72..391251886fbb 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, return buildPtrMask(Res, Op0, MaskReg); } +MachineInstrBuilder +MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && + "Op0 has more elements"); + + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + SmallVector Regs; + for (auto Op : Unmerge.getInstr()->defs()) + Regs.push_back(Op.getReg()); + Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0); + unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); + for (unsigned i = 0; i < NumberOfPadElts; ++i) + Regs.push_back(Undef); + return buildMerge(Res, Regs); +} + +MachineInstrBuilder +MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() < Op0Ty.getNumElements()) && + "Op0 has fewer elements"); + + SmallVector Regs; + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + for (unsigned i = 0; i < ResTy.getNumElements(); ++i) + Regs.push_back(Unmerge.getReg(i)); + return buildMerge(Res, Regs); +} + MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } @@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef Res, MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) { unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits(); - SmallVector TmpVec; - for (unsigned I = 0; I != NumReg; ++I) - TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res)); - return buildUnmerge(TmpVec, Op); + SmallVector TmpVec(NumReg, Res); + return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef Res, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 00ee2988d17f..4981a537dc7c 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { return LLT::scalar(LCMSize); } +LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) { + if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy || + (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits())) + return getLCMType(OrigTy, TargetTy); + + unsigned OrigTyNumElts = OrigTy.getNumElements(); + unsigned TargetTyNumElts = TargetTy.getNumElements(); + if (OrigTyNumElts % TargetTyNumElts == 0) + return OrigTy; + + unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts); + return LLT::scalarOrVector(ElementCount::getFixed(NumElts), + OrigTy.getElementType()); +} + LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { const unsigned OrigSize = OrigTy.getSizeInBits(); const unsigned TargetSize = TargetTy.getSizeInBits(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8055eb9a82d6..067ad819e0d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18395,8 +18395,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (StoreSDNode *ST1 = dyn_cast(Chain)) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { - if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && - ST->getMemoryVT() == ST1->getMemoryVT() && + if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && + ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the // same location, then the store is dead/noop. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e2fe8bf54eb..2ae0d4df7b77 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2500,6 +2500,7 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, /// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth) const { + unsigned Opcode = V.getOpcode(); EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); @@ -2511,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, // Deal with some common cases here that work for both fixed and scalable // vector types. - switch (V.getOpcode()) { + switch (Opcode) { case ISD::SPLAT_VECTOR: UndefElts = V.getOperand(0).isUndef() ? APInt::getAllOnes(DemandedElts.getBitWidth()) @@ -2537,7 +2538,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1); - } + default: + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) + return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth); + break; +} // We don't support other cases than those above for scalable vectors at // the moment. @@ -2548,7 +2554,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getZero(NumElts); - switch (V.getOpcode()) { + switch (Opcode) { case ISD::BUILD_VECTOR: { SDValue Scl; for (unsigned i = 0; i != NumElts; ++i) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3d29e3f39a8e..e6b06ab93d6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3136,6 +3136,19 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, return false; } +bool TargetLowering::isSplatValueForTargetNode(SDValue Op, + const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use isSplatValue if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. @@ -6644,9 +6657,10 @@ SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, EVT ShVT = Op1.getValueType(); SDValue Zero = DAG.getConstant(0, DL, ShVT); - // If a rotate in the other direction is supported, use it. + // If a rotate in the other direction is more supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { + if (!isOperationLegalOrCustom(Node->getOpcode(), VT) && + isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); return DAG.getNode(RevRot, DL, VT, Op0, Sub); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 906a48d81840..5421b2d59a1b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -89,7 +89,6 @@ static void dumpLocationList(raw_ostream &OS, const DWARFFormValue &FormValue, U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(), MRI, Ctx.getDWARFObj(), U, DumpOpts, Indent); - return; } static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, @@ -105,7 +104,6 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, Ctx.isLittleEndian(), 0); DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format) .print(OS, DumpOpts, MRI, U); - return; } static DWARFDie resolveReferencedType(DWARFDie D, diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 9b7ffed2ca67..eed0a60ec75e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -607,7 +607,7 @@ bool DWARFUnit::parseDWO() { DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase); if (getVersion() == 4) { auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); + DWO->setRangesSection(RangeSection, DWORangesBase.getValueOr(0)); } return true; diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 0cefbd63a7ae..262fcb063366 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -70,6 +70,88 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#Number . const char *decodeNumber(const char *Mangled, unsigned long *Ret); + /// Extract the hex-digit from a given string. + /// + /// \param Mangled string to extract the hex-digit. + /// \param Ret assigned result value. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#HexDigits . + const char *decodeHexdigit(const char *Mangled, char *Ret); + + /// Extract the back reference position from a given string. + /// + /// \param Mangled string to extract the back reference position. + /// \param Ret assigned result value. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \note a result <= 0 is a failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + /// \see https://dlang.org/spec/abi.html#NumberBackRef . + const char *decodeBackrefPos(const char *Mangled, long *Ret); + + /// Extract the symbol pointed by the back reference form a given string. + /// + /// \param Mangled string to extract the back reference position. + /// \param Ret assigned result value. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + const char *decodeBackref(const char *Mangled, const char **Ret); + + /// Extract and demangle backreferenced symbol from a given mangled symbol + /// and append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . + const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle backreferenced type from a given mangled symbol + /// and append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param IsFunction whether the backreferenced type is expected to be a + /// function. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + /// \see https://dlang.org/spec/abi.html#TypeBackRef . + const char *parseTypeBackref(OutputBuffer *Demangled, const char *Mangled, + bool IsFunction); + + /// Extract and demangle calling convention from a given mangled symbol and + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#CallConvention . + /// \see https://dlang.org/spec/abi.html#function_calling_conventions . + const char *parseCallConvention(OutputBuffer *Demangled, const char *Mangled); + + /// Check whether it is a function calling convention. + /// + /// \param Mangled string to extract the function calling convention. + /// + /// \return True on success, false otherwise. + /// + /// \see https://dlang.org/spec/abi.html#CallConvention . + /// \see https://dlang.org/spec/abi.html#function_calling_conventions . + bool isCallConvention(const char *Mangled); + /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. @@ -109,14 +191,226 @@ struct Demangler { /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. + /// \param SuffixModifiers True if we are printing the modifiers after the + /// symbol, false otherwise. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#QualifiedName . - const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + const char *parseQualified(OutputBuffer *Demangled, const char *Mangled, + bool SuffixModifiers); + + /// Extract and demangle the D function attributes from a given mangled + /// symbol append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#FuncAttr . + const char *parseAttributes(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the function type from a given mangled symbol + /// without the return type and append it to the arguments, calling + /// convention and attribute output strings, respectively. + /// + /// \param Args output buffer to write the demangled arguments. + /// \param Call output buffer to write the demangled calling convention. + /// \param Attr output buffer to write the demangled attributes. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \note Any of the output strings can be nullptr to throw the information + /// away. + /// + /// \see https://dlang.org/spec/abi.html#TypeFunctionNoReturn . + const char *parseFunctionTypeNoreturn(OutputBuffer *Args, OutputBuffer *Call, + OutputBuffer *Attr, + const char *Mangled); + + /// Extract and demangle the function type from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#TypeFunction . + const char *parseFunctionType(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the function arguments list from a given mangled + /// symbol append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Parameters . + const char *parseFunctionArgs(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle a type from a given mangled symbol append it to + /// the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Type . + const char *parseType(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the type modifiers from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#TypeModifiers . + const char *parseTypeModifiers(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle a tuple value from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#TypeTuple . + const char *parseTuple(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the template symbol parameter from a given mangled + /// symbol append it to the output string + /// + /// \param Demangled output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#TemplateArgX + const char *parseTemplateSymbolParameter(OutputBuffer *Demangled, + const char *Mangled); + + /// Extract and demangle the template arguments list from a given mangled + /// symbol append it to the output string + /// + /// \param Demangled output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#TemplateArgs + const char *parseTemplateArgs(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle a template from a given mangled symbol and append it + /// to the output string + /// + /// \param Demangled output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// \param Len expected characters length (default to -1 if unknown length) + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#TemplateInstanceName + const char *parseTemplate(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len = -1); + + /// Extract and demangle an integer value from a given mangled symbol append + /// it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param Type mangled type character in which the type should be + /// represented as. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseInteger(OutputBuffer *Demangled, const char *Mangled, + char Type); + + /// Extract and demangle an struct literal value from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param Name demangled symbol name of the struct literal. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseStructLiteral(OutputBuffer *Demangled, const char *Mangled, + const char *Name); + + /// Extract and demangle an array literal value from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseArrayLiteral(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle an associative array value from a given mangled + /// symbol append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseAssocArray(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle a string value from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseString(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle a floating-point value from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseReal(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle any value from a given mangled symbol append it to + /// the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param Name demangled symbol name of the type, if needed. + /// \param Type mangled type character in which the type should be + /// represented as, if needed. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseValue(OutputBuffer *Demangled, const char *Mangled, + const char *Name, char Type); /// The string we are demangling. const char *Str; + /// The index of the last back reference. + int LastBackref; }; } // namespace @@ -146,126 +440,940 @@ const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { return Mangled; } -bool Demangler::isSymbolName(const char *Mangled) { - if (std::isdigit(*Mangled)) - return true; +const char *Demangler::decodeHexdigit(const char *Mangled, char *Ret) { + char C; + + // Return nullptr if trying to extract something that isn't a hexdigit + if (Mangled == nullptr || !std::isxdigit(Mangled[0]) || + !std::isxdigit(Mangled[1])) + return nullptr; + + C = Mangled[0]; + if (!std::isdigit(C)) + *Ret = C - (std::isupper(C) ? 'A' : 'a') + 10; + else + *Ret = C - '0'; - // TODO: Handle symbol back references and template instances. - return false; + C = Mangled[1]; + if (!std::isdigit(C)) + *Ret = (*Ret << 4) | (C - (std::isupper(C) ? 'A' : 'a') + 10); + else + *Ret = (*Ret << 4) | (C - '0'); + + Mangled += 2; + + return Mangled; } -const char *Demangler::parseMangle(OutputBuffer *Demangled, - const char *Mangled) { - // A D mangled symbol is comprised of both scope and type information. - // MangleName: - // _D QualifiedName Type - // _D QualifiedName Z +const char *Demangler::decodeBackrefPos(const char *Mangled, long *Ret) { + // Return nullptr if trying to extract something that isn't a digit + if (Mangled == nullptr || !std::isalpha(*Mangled)) + return nullptr; + + // Any identifier or non-basic type that has been emitted to the mangled + // symbol before will not be emitted again, but is referenced by a special + // sequence encoding the relative position of the original occurrence in the + // mangled symbol name. + // Numbers in back references are encoded with base 26 by upper case letters + // A-Z for higher digits but lower case letters a-z for the last digit. + // NumberBackRef: + // [a-z] + // [A-Z] NumberBackRef // ^ - // The caller should have guaranteed that the start pointer is at the - // above location. - // Note that type is never a function type, but only the return type of - // a function or the type of a variable. - Mangled += 2; + unsigned long Val = 0; - Mangled = parseQualified(Demangled, Mangled); + while (std::isalpha(*Mangled)) { + // Check for overflow + if (Val > (std::numeric_limits::max() - 25) / 26) + break; - if (Mangled != nullptr) { - // Artificial symbols end with 'Z' and have no type. - if (*Mangled == 'Z') - ++Mangled; - else { - // TODO: Implement symbols with types. - return nullptr; + Val *= 26; + + if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { + Val += Mangled[0] - 'a'; + if ((long)Val <= 0) + break; + *Ret = Val; + return Mangled + 1; } + + Val += Mangled[0] - 'A'; + ++Mangled; } - return Mangled; + return nullptr; } -const char *Demangler::parseQualified(OutputBuffer *Demangled, - const char *Mangled) { - // Qualified names are identifiers separated by their encoded length. - // Nested functions also encode their argument types without specifying - // what they return. - // QualifiedName: - // SymbolFunctionName - // SymbolFunctionName QualifiedName - // ^ - // SymbolFunctionName: - // SymbolName - // SymbolName TypeFunctionNoReturn - // SymbolName M TypeFunctionNoReturn - // SymbolName M TypeModifiers TypeFunctionNoReturn - // The start pointer should be at the above location. +const char *Demangler::decodeBackref(const char *Mangled, const char **Ret) { + *Ret = nullptr; - // Whether it has more than one symbol - size_t NotFirst = false; - do { - // Skip over anonymous symbols. - if (*Mangled == '0') { - do - ++Mangled; - while (*Mangled == '0'); + if (Mangled == nullptr || *Mangled != 'Q') + return nullptr; - continue; - } + // Position of 'Q' + const char *Qpos = Mangled; + long RefPos; + ++Mangled; - if (NotFirst) - *Demangled << '.'; - NotFirst = true; + Mangled = decodeBackrefPos(Mangled, &RefPos); + if (Mangled == nullptr) + return nullptr; - Mangled = parseIdentifier(Demangled, Mangled); + if (RefPos > Qpos - Str) + return nullptr; - } while (Mangled && isSymbolName(Mangled)); + // Set the position of the back reference. + *Ret = Qpos - RefPos; return Mangled; } -const char *Demangler::parseIdentifier(OutputBuffer *Demangled, - const char *Mangled) { +const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled, + const char *Mangled) { + // An identifier back reference always points to a digit 0 to 9. + // IdentifierBackRef: + // Q NumberBackRef + // ^ + const char *Backref; unsigned long Len; - if (Mangled == nullptr || *Mangled == '\0') + // Get position of the back reference + Mangled = decodeBackref(Mangled, &Backref); + + // Must point to a simple identifier + Backref = decodeNumber(Backref, &Len); + if (Backref == nullptr || strlen(Backref) < Len) return nullptr; - // TODO: Parse back references and lengthless template instances. + Backref = parseLName(Demangled, Backref, Len); + if (Backref == nullptr) + return nullptr; - const char *Endptr = decodeNumber(Mangled, &Len); + return Mangled; +} - if (Endptr == nullptr || Len == 0) - return nullptr; +const char *Demangler::parseTypeBackref(OutputBuffer *Demangled, + const char *Mangled, bool IsFunction) { + // A type back reference always points to a letter. + // TypeBackRef: + // Q NumberBackRef + // ^ + const char *Backref; - if (strlen(Endptr) < Len) + // If we appear to be moving backwards through the mangle string, then + // bail as this may be a recursive back reference. + if (Mangled - Str >= LastBackref) return nullptr; - Mangled = Endptr; + int SaveRefPos = LastBackref; + LastBackref = Mangled - Str; - // TODO: Parse template instances with a length prefix. + // Get position of the back reference. + Mangled = decodeBackref(Mangled, &Backref); - // There can be multiple different declarations in the same function that - // have the same mangled name. To make the mangled names unique, a fake - // parent in the form `__Sddd' is added to the symbol. - if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { - const char *NumPtr = Mangled + 3; - while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) - ++NumPtr; + // Must point to a type. + if (IsFunction) + Backref = parseFunctionType(Demangled, Backref); + else + Backref = parseType(Demangled, Backref); - if (Mangled + Len == NumPtr) { - // Skip over the fake parent. - Mangled += Len; - return parseIdentifier(Demangled, Mangled); - } + LastBackref = SaveRefPos; - // Else demangle it as a plain identifier. + if (Backref == nullptr) + return nullptr; + + return Mangled; +} + +bool Demangler::isCallConvention(const char *Mangled) { + switch (*Mangled) { + case 'F': + case 'U': + case 'V': + case 'W': + case 'R': + case 'Y': + return true; + + default: + return false; } +} - return parseLName(Demangled, Mangled, Len); +bool Demangler::isSymbolName(const char *Mangled) { + long Ret; + const char *Qref = Mangled; + + if (std::isdigit(*Mangled)) + return true; + + if (Mangled[0] == '_' && Mangled[1] == '_' && + (Mangled[2] == 'T' || Mangled[2] == 'U')) + return true; + + if (*Mangled != 'Q') + return false; + + Mangled = decodeBackrefPos(Mangled + 1, &Ret); + if (Mangled == nullptr || Ret > Qref - Str) + return false; + + return std::isdigit(Qref[-Ret]); } -const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, - unsigned long Len) { +const char *Demangler::parseCallConvention(OutputBuffer *Demangled, + const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + case 'F': // D + ++Mangled; + break; + + case 'U': // C + ++Mangled; + *Demangled << "extern(C) "; + break; + + case 'W': // Windows + ++Mangled; + *Demangled << "extern(Windows) "; + break; + + case 'V': // Pascal + ++Mangled; + *Demangled << "extern(Pascal) "; + break; + + case 'R': // C++ + ++Mangled; + *Demangled << "extern(C++) "; + break; + + case 'Y': // Objective-C + ++Mangled; + *Demangled << "extern(Objective-C) "; + break; + + default: + return nullptr; + } + + return Mangled; +} + +const char *Demangler::parseAttributes(OutputBuffer *Demangled, + const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + while (*Mangled == 'N') { + ++Mangled; + switch (*Mangled) { + case 'a': // pure + ++Mangled; + *Demangled << "pure "; + continue; + + case 'b': // nothrow + ++Mangled; + *Demangled << "nothrow "; + continue; + + case 'c': // ref + ++Mangled; + *Demangled << "ref "; + continue; + + case 'd': // @property + ++Mangled; + *Demangled << "@property "; + continue; + + case 'e': // @trusted + ++Mangled; + *Demangled << "@trusted "; + continue; + + case 'f': // @safe + ++Mangled; + *Demangled << "@safe "; + continue; + + case 'g': + case 'h': + case 'k': + case 'n': + // inout parameter is represented as 'Ng'. + // vector parameter is represented as 'Nh'. + // return parameter is represented as 'Nk'. + // typeof(*null) parameter is represented as 'Nn'. + // If we see this, then we know we're really in the + // parameter list. Rewind and break. + Mangled--; + break; + + case 'i': // @nogc + ++Mangled; + *Demangled << "@nogc "; + continue; + + case 'j': // return + ++Mangled; + *Demangled << "return "; + continue; + + case 'l': // scope + ++Mangled; + *Demangled << "scope "; + continue; + + case 'm': // @live + ++Mangled; + *Demangled << "@live "; + continue; + + default: // unknown attribute + return nullptr; + } + break; + } + + return Mangled; +} + +const char *Demangler::parseFunctionTypeNoreturn(OutputBuffer *Args, + OutputBuffer *Call, + OutputBuffer *Attr, + const char *Mangled) { + OutputBuffer Dump; + if (!initializeOutputBuffer(nullptr, nullptr, Dump, 32)) + return nullptr; + + // Skip over calling convention and attributes + Mangled = parseCallConvention(Call ? Call : &Dump, Mangled); + Mangled = parseAttributes(Attr ? Attr : &Dump, Mangled); + + if (Args) + *Args << '('; + + Mangled = parseFunctionArgs(Args ? Args : &Dump, Mangled); + if (Args) + *Args << ')'; + + std::free(Dump.getBuffer()); + return Mangled; +} + +const char *Demangler::parseFunctionType(OutputBuffer *Demangled, + const char *Mangled) { + OutputBuffer Attr, Args, Type; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + // The order of the mangled string is: + // CallConvention FuncAttrs Arguments ArgClose Type + // The demangled string is re-ordered as: + // CallConvention Type Arguments FuncAttrs + if (!initializeOutputBuffer(nullptr, nullptr, Attr, 32) || + !initializeOutputBuffer(nullptr, nullptr, Args, 32) || + !initializeOutputBuffer(nullptr, nullptr, Type, 32)) + return nullptr; + + Mangled = parseFunctionTypeNoreturn(&Args, Demangled, &Attr, Mangled); + + // Function return type + Mangled = parseType(&Type, Mangled); + + // Append to decl in order + *Demangled << StringView(Type.getBuffer(), Type.getCurrentPosition()); + *Demangled << StringView(Args.getBuffer(), Args.getCurrentPosition()); + *Demangled << ' '; + *Demangled << StringView(Attr.getBuffer(), Attr.getCurrentPosition()); + + std::free(Attr.getBuffer()); + std::free(Args.getBuffer()); + std::free(Type.getBuffer()); + return Mangled; +} + +const char *Demangler::parseMangle(OutputBuffer *Demangled, + const char *Mangled) { + // A D mangled symbol is comprised of both scope and type information. + // MangleName: + // _D QualifiedName Type + // _D QualifiedName Z + // ^ + // The caller should have guaranteed that the start pointer is at the + // above location. + // Note that type is never a function type, but only the return type of + // a function or the type of a variable. + Mangled += 2; + + Mangled = parseQualified(Demangled, Mangled, true); + + if (Mangled != nullptr) { + // Artificial symbols end with 'Z' and have no type. + if (*Mangled == 'Z') + ++Mangled; + else { + // Discard the declaration or return type. + OutputBuffer Type; + if (!initializeOutputBuffer(nullptr, nullptr, Type, 32)) + return nullptr; + + Mangled = parseType(&Type, Mangled); + std::free(Type.getBuffer()); + } + } + + return Mangled; +} + +const char *Demangler::parseQualified(OutputBuffer *Demangled, + const char *Mangled, + bool SuffixModifiers) { + // Qualified names are identifiers separated by their encoded length. + // Nested functions also encode their argument types without specifying + // what they return. + // QualifiedName: + // SymbolFunctionName + // SymbolFunctionName QualifiedName + // ^ + // SymbolFunctionName: + // SymbolName + // SymbolName TypeFunctionNoReturn + // SymbolName M TypeFunctionNoReturn + // SymbolName M TypeModifiers TypeFunctionNoReturn + // The start pointer should be at the above location. + + // Whether it has more than one symbol + size_t NotFirst = false; + do { + // Skip over anonymous symbols. + if (*Mangled == '0') { + do + ++Mangled; + while (*Mangled == '0'); + + continue; + } + + if (NotFirst) + *Demangled << '.'; + NotFirst = true; + + Mangled = parseIdentifier(Demangled, Mangled); + + if (Mangled && (*Mangled == 'M' || isCallConvention(Mangled))) { + const char *Start = Mangled; + int Saved = Demangled->getCurrentPosition(); + + // Save the type modifiers for appending at the end if needed. + OutputBuffer Mods; + if (!initializeOutputBuffer(nullptr, nullptr, Mods, 32)) + return nullptr; + + // Skip over 'this' parameter and type modifiers. + if (*Mangled == 'M') { + ++Mangled; + Mangled = parseTypeModifiers(&Mods, Mangled); + Demangled->setCurrentPosition(Saved); + } + + Mangled = parseFunctionTypeNoreturn(Demangled, nullptr, nullptr, Mangled); + if (SuffixModifiers) + *Demangled << StringView(Mods.getBuffer(), Mods.getCurrentPosition()); + + if (Mangled == nullptr || *Mangled == '\0') { + // Did not match the rule we were looking for. + Mangled = Start; + Demangled->setCurrentPosition(Saved); + } + + std::free(Mods.getBuffer()); + } + } while (Mangled && isSymbolName(Mangled)); + + return Mangled; +} + +const char *Demangler::parseTypeModifiers(OutputBuffer *Demangled, + const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + case 'x': // const + ++Mangled; + *Demangled << " const"; + return Mangled; + + case 'y': // immutable + ++Mangled; + *Demangled << " immutable"; + return Mangled; + + case 'O': // shared + ++Mangled; + *Demangled << " shared"; + return parseTypeModifiers(Demangled, Mangled); + + case 'N': + ++Mangled; + if (*Mangled == 'g') // wild + { + ++Mangled; + *Demangled << " inout"; + return parseTypeModifiers(Demangled, Mangled); + } + + return nullptr; + + default: + return Mangled; + } +} + +const char *Demangler::parseIdentifier(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Len; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + if (*Mangled == 'Q') + return parseSymbolBackref(Demangled, Mangled); + + // May be a template instance without a length prefix. + if (Mangled[0] == '_' && Mangled[1] == '_' && + (Mangled[2] == 'T' || Mangled[2] == 'U')) + return parseTemplate(Demangled, Mangled); + + const char *Endptr = decodeNumber(Mangled, &Len); + + if (Endptr == nullptr || Len == 0) + return nullptr; + + if (strlen(Endptr) < Len) + return nullptr; + + Mangled = Endptr; + + // May be a template instance with a length prefix. + if (Len >= 5 && Mangled[0] == '_' && Mangled[1] == '_' && + (Mangled[2] == 'T' || Mangled[2] == 'U')) + return parseTemplate(Demangled, Mangled, Len); + + // There can be multiple different declarations in the same function that + // have the same mangled name. To make the mangled names unique, a fake + // parent in the form `__Sddd' is added to the symbol. + if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { + const char *NumPtr = Mangled + 3; + while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) + ++NumPtr; + + if (Mangled + Len == NumPtr) { + // Skip over the fake parent. + Mangled += Len; + return parseIdentifier(Demangled, Mangled); + } + + // Else demangle it as a plain identifier. + } + + return parseLName(Demangled, Mangled, Len); +} + +const char *Demangler::parseFunctionArgs(OutputBuffer *Demangled, + const char *Mangled) { + size_t N = 0; + + while (Mangled && *Mangled != '\0') { + switch (*Mangled) { + case 'X': // (variadic T t...) style + ++Mangled; + *Demangled << "..."; + return Mangled; + case 'Y': // (variadic T t, ...) style + ++Mangled; + *Demangled << ", ..."; + return Mangled; + case 'Z': // Normal function + ++Mangled; + return Mangled; + } + + if (N++) + *Demangled << ", "; + + if (*Mangled == 'M') // scope(T) + { + ++Mangled; + *Demangled << "scope "; + } + + if (Mangled[0] == 'N' && Mangled[1] == 'k') // return(T) + { + Mangled += 2; + *Demangled << "return "; + } + + switch (*Mangled) { + case 'I': // in(T) + ++Mangled; + *Demangled << "in "; + if (*Mangled == 'K') // in ref(T) + { + ++Mangled; + *Demangled << "ref "; + } + break; + case 'J': // out(T) + ++Mangled; + *Demangled << "out "; + break; + case 'K': // ref(T) + ++Mangled; + *Demangled << "ref "; + break; + case 'L': // lazy(T) + ++Mangled; + *Demangled << "lazy "; + break; + } + Mangled = parseType(Demangled, Mangled); + } + + return Mangled; +} + +const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + case 'O': // shared(T) + ++Mangled; + *Demangled << "shared("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'x': // const(T) + ++Mangled; + *Demangled << "const("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'y': // immutable(T) + ++Mangled; + *Demangled << "immutable("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'N': + ++Mangled; + + switch (*Mangled) { + case 'g': // wild(T) + ++Mangled; + *Demangled << "inout("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'h': // vector(T) + ++Mangled; + *Demangled << "__vector("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'n': // typeof(*null) + ++Mangled; + *Demangled << "typeof(*null)"; + return Mangled; + } + + // invalid. + return nullptr; + + case 'A': // dynamic array (T[]) + ++Mangled; + Mangled = parseType(Demangled, Mangled); + *Demangled << "[]"; + return Mangled; + + case 'G': // static array (T[N]) + { + const char *NumPtr; + size_t Num = 0; + ++Mangled; + + NumPtr = Mangled; + while (std::isdigit(*Mangled)) { + ++Num; + ++Mangled; + } + Mangled = parseType(Demangled, Mangled); + *Demangled << '['; + *Demangled << StringView(NumPtr, Num); + *Demangled << ']'; + return Mangled; + } + + case 'H': // associative array (T[T]) + { + OutputBuffer Type; + if (!initializeOutputBuffer(nullptr, nullptr, Type, 32)) + return nullptr; + + size_t Sztype; + ++Mangled; + + Mangled = parseType(&Type, Mangled); + Sztype = Type.getCurrentPosition(); + + Mangled = parseType(Demangled, Mangled); + *Demangled << '['; + *Demangled << StringView(Type.getBuffer(), Sztype); + *Demangled << ']'; + + std::free(Type.getBuffer()); + return Mangled; + } + + case 'P': // pointer (T*) + ++Mangled; + if (!isCallConvention(Mangled)) { + Mangled = parseType(Demangled, Mangled); + *Demangled << '*'; + return Mangled; + } + + [[clang::fallthrough]]; + case 'F': // function T (D) + case 'U': // function T (C) + case 'W': // function T (Windows) + case 'V': // function T (Pascal) + case 'R': // function T (C++) + case 'Y': // function T (Objective-C) + // Function pointer types don't include the trailing asterisk. + Mangled = parseFunctionType(Demangled, Mangled); + *Demangled << "function"; + return Mangled; + + case 'C': // class T + case 'S': // struct T + case 'E': // enum T + case 'T': // typedef T + ++Mangled; + return parseQualified(Demangled, Mangled, false); + + case 'D': // delegate T + { + OutputBuffer Mods; + if (!initializeOutputBuffer(nullptr, nullptr, Mods, 32)) + return nullptr; + + size_t Szmods; + ++Mangled; + + Mangled = parseTypeModifiers(&Mods, Mangled); + Szmods = Mods.getCurrentPosition(); + + // Back referenced function type. + if (Mangled && *Mangled == 'Q') + Mangled = parseTypeBackref(Demangled, Mangled, true); + else + Mangled = parseFunctionType(Demangled, Mangled); + + *Demangled << "delegate"; + *Demangled << StringView(Mods.getBuffer(), Szmods); + + std::free(Mods.getBuffer()); + return Mangled; + } + + case 'B': // tuple T + ++Mangled; + return parseTuple(Demangled, Mangled); + + // Basic types. + case 'n': + ++Mangled; + *Demangled << "typeof(null)"; + return Mangled; + + case 'v': + ++Mangled; + *Demangled << "void"; + return Mangled; + + case 'g': + ++Mangled; + *Demangled << "byte"; + return Mangled; + case 'h': + ++Mangled; + *Demangled << "ubyte"; + return Mangled; + + case 's': + ++Mangled; + *Demangled << "short"; + return Mangled; + case 't': + ++Mangled; + *Demangled << "ushort"; + return Mangled; + + case 'i': + ++Mangled; + *Demangled << "int"; + return Mangled; + case 'k': + ++Mangled; + *Demangled << "uint"; + return Mangled; + + case 'l': + ++Mangled; + *Demangled << "long"; + return Mangled; + case 'm': + ++Mangled; + *Demangled << "ulong"; + return Mangled; + + case 'f': + ++Mangled; + *Demangled << "float"; + return Mangled; + case 'd': + ++Mangled; + *Demangled << "double"; + return Mangled; + case 'e': + ++Mangled; + *Demangled << "real"; + return Mangled; + + // Imaginary types. + case 'o': + ++Mangled; + *Demangled << "ifloat"; + return Mangled; + case 'p': + ++Mangled; + *Demangled << "idouble"; + return Mangled; + case 'j': + ++Mangled; + *Demangled << "ireal"; + return Mangled; + + // Complex types. + case 'q': + ++Mangled; + *Demangled << "cfloat"; + return Mangled; + case 'r': + ++Mangled; + *Demangled << "cdouble"; + return Mangled; + case 'c': + ++Mangled; + *Demangled << "creal"; + return Mangled; + + // Other types. + case 'b': + ++Mangled; + *Demangled << "bool"; + return Mangled; + + case 'a': + ++Mangled; + *Demangled << "char"; + return Mangled; + case 'u': + ++Mangled; + *Demangled << "wchar"; + return Mangled; + case 'w': + ++Mangled; + *Demangled << "dchar"; + return Mangled; + + case 'z': + ++Mangled; + + switch (*Mangled) { + case 'i': + ++Mangled; + *Demangled << "cent"; + return Mangled; + case 'k': + ++Mangled; + *Demangled << "ucent"; + return Mangled; + } + return nullptr; + + // Back referenced type. + case 'Q': + return parseTypeBackref(Demangled, Mangled, false); + + default: // unhandled. + return nullptr; + } +} + +const char *Demangler::parseTuple(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Elements; + + Mangled = decodeNumber(Mangled, &Elements); + if (Mangled == nullptr) + return nullptr; + + *Demangled << "tuple("; + + while (Elements--) { + Mangled = parseType(Demangled, Mangled); + if (Mangled == nullptr) + return nullptr; + + if (Elements != 0) + *Demangled << ", "; + } + + *Demangled << ')'; + return Mangled; +} + +const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len) { switch (Len) { case 6: + if (strncmp(Mangled, "__ctor", Len) == 0) { + // Constructor symbol for a class/struct. + *Demangled << "this"; + Mangled += Len; + return Mangled; + } + if (strncmp(Mangled, "__dtor", Len) == 0) { + // Destructor symbol for a class/struct. + *Demangled << "~this"; + Mangled += Len; + return Mangled; + } if (strncmp(Mangled, "__initZ", Len + 1) == 0) { // The static initializer for a given symbol. Demangled->prepend("initializer for "); @@ -292,6 +1400,15 @@ const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, } break; + case 10: + if (strncmp(Mangled, "__postblitMFZ", Len + 3) == 0) { + // Postblit symbol for a struct. + *Demangled << "this(this)"; + Mangled += Len + 3; + return Mangled; + } + break; + case 11: if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { // The interface symbol for a given class. @@ -319,7 +1436,568 @@ const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, return Mangled; } -Demangler::Demangler(const char *Mangled) : Str(Mangled) {} +const char *Demangler::parseTemplateSymbolParameter(OutputBuffer *Demangled, + const char *Mangled) { + if (strncmp(Mangled, "_D", 2) == 0 && isSymbolName(Mangled + 2)) + return parseMangle(Demangled, Mangled); + + if (*Mangled == 'Q') + return parseQualified(Demangled, Mangled, false); + + unsigned long Len; + const char *EndPtr = decodeNumber(Mangled, &Len); + + if (EndPtr == nullptr || Len == 0) + return nullptr; + + // In template parameter symbols generated by the frontend up to 2.076, + // the symbol length is encoded and the first character of the mangled + // name can be a digit. This causes ambiguity issues because the digits + // of the two numbers are adjacent. + long PtrSize = Len; + const char *PtrEnd; + int Saved = Demangled->getCurrentPosition(); + + // Work backwards until a match is found. + for (PtrEnd = EndPtr; EndPtr != nullptr; PtrEnd--) { + Mangled = PtrEnd; + + // Reached the beginning of the pointer to the name length, + // try parsing the entire symbol. + if (PtrSize == 0) { + PtrSize = Len; + PtrEnd = EndPtr; + EndPtr = nullptr; + } + + // Check whether template parameter is a function with a valid + // return type or an untyped identifier. + if (isSymbolName(Mangled)) + Mangled = parseQualified(Demangled, Mangled, false); + else if (strncmp(Mangled, "_D", 2) == 0 && isSymbolName(Mangled + 2)) + Mangled = parseMangle(Demangled, Mangled); + + // Check for name length mismatch. + if (Mangled && (EndPtr == nullptr || (Mangled - PtrEnd) == PtrSize)) + return Mangled; + + PtrSize /= 10; + Demangled->setCurrentPosition(Saved); + } + + // No match on any combinations. + return nullptr; +} + +const char *Demangler::parseTemplateArgs(OutputBuffer *Demangled, + const char *Mangled) { + size_t N = 0; + + while (Mangled && *Mangled != '\0') { + switch (*Mangled) { + case 'Z': // End of parameter list. + ++Mangled; + return Mangled; + } + + if (N++) + *Demangled << ", "; + + // Skip over specialised template prefix. + if (*Mangled == 'H') + ++Mangled; + + switch (*Mangled) { + case 'S': // Symbol parameter. + ++Mangled; + Mangled = parseTemplateSymbolParameter(Demangled, Mangled); + break; + case 'T': // Type parameter. + ++Mangled; + Mangled = parseType(Demangled, Mangled); + break; + case 'V': // Value parameter. + { + OutputBuffer Name; + char Type; + + // Peek at the type. + ++Mangled; + Type = *Mangled; + + if (Type == 'Q') { + // Value type is a back reference, peek at the real type. + const char *Backref; + if (decodeBackref(Mangled, &Backref) == nullptr) + return nullptr; + + Type = *Backref; + } + + // In the few instances where the type is actually desired in + // the output, it should precede the value from dlang_value. + Name = OutputBuffer(); + if (!initializeOutputBuffer(nullptr, nullptr, Name, 32)) + return nullptr; + + Mangled = parseType(&Name, Mangled); + Name << '\0'; + Name.setCurrentPosition(Name.getCurrentPosition() - 1); + + Mangled = parseValue(Demangled, Mangled, Name.getBuffer(), Type); + std::free(Name.getBuffer()); + break; + } + + case 'X': // Externally mangled parameter. + { + unsigned long Len; + const char *EndPtr; + + ++Mangled; + EndPtr = decodeNumber(Mangled, &Len); + if (EndPtr == nullptr || strlen(EndPtr) < Len) + return nullptr; + + *Demangled << StringView(EndPtr, Len); + Mangled = EndPtr + Len; + break; + } + default: + return nullptr; + } + } + + return Mangled; +} + +const char *Demangler::parseTemplate(OutputBuffer *Demangled, + const char *Mangled, unsigned long Len) { + const char *Start = Mangled; + OutputBuffer Args; + + // Template instance names have the types and values of its parameters + // encoded into it. + // TemplateInstanceName: + // Number __T LName TemplateArgs Z + // Number __U LName TemplateArgs Z + // ^ + // The start pointer should be at the above location, and LEN should be + // the value of the decoded number. + + // Template symbol. + if (!isSymbolName(Mangled + 3) || Mangled[3] == '0') + return nullptr; + + Mangled += 3; + + // Template identifier. + Mangled = parseIdentifier(Demangled, Mangled); + + if (!initializeOutputBuffer(nullptr, nullptr, Args, 32)) + return nullptr; + + // Template arguments. + Mangled = parseTemplateArgs(&Args, Mangled); + + *Demangled << "!("; + *Demangled << StringView(Args.getBuffer(), Args.getCurrentPosition()); + *Demangled << ')'; + + std::free(Args.getBuffer()); + + // Check for template name length mismatch. + if (Len != -1UL && Mangled && (unsigned long)(Mangled - Start) != Len) + return nullptr; + + return Mangled; +} + +const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, + const char *Name, char Type) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + // Null value. + case 'n': + ++Mangled; + *Demangled << "null"; + break; + + // Integral values. + case 'N': + ++Mangled; + *Demangled << '-'; + Mangled = parseInteger(Demangled, Mangled, Type); + break; + + case 'i': + ++Mangled; + + [[clang::fallthrough]]; + // There really should always be an `i' before encoded numbers, but there + // wasn't in early versions of D2, so this case range must remain for + // backwards compatibility. + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + Mangled = parseInteger(Demangled, Mangled, Type); + break; + + // Real value. + case 'e': + ++Mangled; + Mangled = parseReal(Demangled, Mangled); + break; + + // Complex value. + case 'c': + ++Mangled; + Mangled = parseReal(Demangled, Mangled); + *Demangled << '+'; + if (Mangled == nullptr || *Mangled != 'c') + return nullptr; + ++Mangled; + Mangled = parseReal(Demangled, Mangled); + *Demangled << 'i'; + break; + + // String values. + case 'a': // UTF8 + case 'w': // UTF16 + case 'd': // UTF32 + Mangled = parseString(Demangled, Mangled); + break; + + // Array values. + case 'A': + ++Mangled; + if (Type == 'H') + Mangled = parseAssocArray(Demangled, Mangled); + else + Mangled = parseArrayLiteral(Demangled, Mangled); + break; + + // Struct values. + case 'S': + ++Mangled; + Mangled = parseStructLiteral(Demangled, Mangled, Name); + break; + + // Function literal symbol. + case 'f': + ++Mangled; + if (strncmp(Mangled, "_D", 2) != 0 || !isSymbolName(Mangled + 2)) + return nullptr; + Mangled = parseMangle(Demangled, Mangled); + break; + + default: + return nullptr; + } + + return Mangled; +} + +const char *Demangler::parseStructLiteral(OutputBuffer *Demangled, + const char *Mangled, + const char *Name) { + unsigned long Args; + + Mangled = decodeNumber(Mangled, &Args); + if (Mangled == nullptr) + return nullptr; + + if (Name != nullptr) + *Demangled << Name; + + *Demangled << '('; + while (Args--) { + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); + if (Mangled == nullptr) + return nullptr; + + if (Args != 0) + *Demangled << ", "; + } + + *Demangled << ')'; + return Mangled; +} + +const char *Demangler::parseString(OutputBuffer *Demangled, + const char *Mangled) { + char Type = *Mangled; + unsigned long Len; + + ++Mangled; + Mangled = decodeNumber(Mangled, &Len); + if (Mangled == nullptr || *Mangled != '_') + return nullptr; + + ++Mangled; + *Demangled << '\"'; + while (Len--) { + char Val; + const char *Endptr = decodeHexdigit(Mangled, &Val); + + if (Endptr == nullptr) + return nullptr; + + // Sanitize white and non-printable characters. + switch (Val) { + case ' ': + *Demangled << ' '; + break; + case '\t': + *Demangled << "\\t"; + break; + case '\n': + *Demangled << "\\n"; + break; + case '\r': + *Demangled << "\\r"; + break; + case '\f': + *Demangled << "\\f"; + break; + case '\v': + *Demangled << "\\v"; + break; + + default: + if (std::isprint(Val)) + *Demangled << Val; + else { + *Demangled << "\\x"; + *Demangled << StringView(Mangled, 2); + } + } + + Mangled = Endptr; + } + *Demangled << '\"'; + + if (Type != 'a') + *Demangled << Type; + + return Mangled; +} + +const char *Demangler::parseInteger(OutputBuffer *Demangled, + const char *Mangled, char Type) { + if (Type == 'a' || Type == 'u' || Type == 'w') { + // Parse character value + char Value[20]; + int Pos = sizeof(Value); + int Width = 0; + unsigned long Val; + + Mangled = decodeNumber(Mangled, &Val); + if (Mangled == nullptr) + return nullptr; + + *Demangled << '\''; + + if (Type == 'a' && Val >= 0x20 && Val < 0x7F) { + // Represent as a character literal + *Demangled << static_cast(Val); + } else { + // Represent as a hexadecimal value + switch (Type) { + case 'a': // char + *Demangled << "\\x"; + Width = 2; + break; + case 'u': // wchar + *Demangled << "\\u"; + Width = 4; + break; + case 'w': // dchar + *Demangled << "\\U"; + Width = 8; + break; + } + + while (Val > 0) { + int Digit = Val % 16; + + if (Digit < 10) + Value[--Pos] = (char)(Digit + '0'); + else + Value[--Pos] = (char)((Digit - 10) + 'a'); + + Val /= 16; + Width--; + } + + for (; Width > 0; Width--) + Value[--Pos] = '0'; + + *Demangled << StringView(&(Value[Pos]), sizeof(Value) - Pos); + } + *Demangled << '\''; + } else if (Type == 'b') { + // Parse boolean value + unsigned long Val; + + Mangled = decodeNumber(Mangled, &Val); + if (Mangled == nullptr) + return nullptr; + + *Demangled << (Val ? "true" : "false"); + } else { + // Parse integer value + const char *NumPtr = Mangled; + size_t Num = 0; + + if (!std::isdigit(*Mangled)) + return nullptr; + + while (std::isdigit(*Mangled)) { + ++Num; + ++Mangled; + } + *Demangled << StringView(NumPtr, Num); + + // Append suffix + switch (Type) { + case 'h': // ubyte + case 't': // ushort + case 'k': // uint + *Demangled << 'u'; + break; + case 'l': // long + *Demangled << 'L'; + break; + case 'm': // ulong + *Demangled << "uL"; + break; + } + } + + return Mangled; +} + +const char *Demangler::parseReal(OutputBuffer *Demangled, const char *Mangled) { + // Handle NAN and +-INF. + if (strncmp(Mangled, "NAN", 3) == 0) { + *Demangled << "NaN"; + Mangled += 3; + return Mangled; + } + + if (strncmp(Mangled, "INF", 3) == 0) { + *Demangled << "Inf"; + Mangled += 3; + return Mangled; + } + + if (strncmp(Mangled, "NINF", 4) == 0) { + *Demangled << "-Inf"; + Mangled += 4; + return Mangled; + } + + // Hexadecimal prefix and leading bit. + if (*Mangled == 'N') { + *Demangled << '-'; + ++Mangled; + } + + if (!std::isxdigit(*Mangled)) + return nullptr; + + *Demangled << "0x"; + *Demangled << StringView(Mangled, 1); + *Demangled << '.'; + ++Mangled; + + // Significand. + while (std::isxdigit(*Mangled)) { + *Demangled << StringView(Mangled, 1); + ++Mangled; + } + + // Exponent. + if (*Mangled != 'P') + return nullptr; + + *Demangled << 'p'; + ++Mangled; + + if (*Mangled == 'N') { + *Demangled << '-'; + ++Mangled; + } + + while (std::isdigit(*Mangled)) { + *Demangled << StringView(Mangled, 1); + ++Mangled; + } + + return Mangled; +} + +const char *Demangler::parseArrayLiteral(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Elements; + + Mangled = decodeNumber(Mangled, &Elements); + if (Mangled == nullptr) + return nullptr; + + *Demangled << '['; + while (Elements--) { + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); + if (Mangled == nullptr) + return nullptr; + + if (Elements != 0) + *Demangled << ", "; + } + + *Demangled << ']'; + return Mangled; +} + +const char *Demangler::parseAssocArray(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Elements; + + Mangled = decodeNumber(Mangled, &Elements); + if (Mangled == nullptr) + return nullptr; + + *Demangled << '['; + while (Elements--) { + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); + if (Mangled == nullptr) + return nullptr; + + *Demangled << ':'; + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); + if (Mangled == nullptr) + return nullptr; + + if (Elements != 0) + *Demangled << ", "; + } + + *Demangled << ']'; + return Mangled; +} + +Demangler::Demangler(const char *Mangled) + : Str(Mangled), LastBackref(strlen(Mangled)) {} const char *Demangler::parseMangle(OutputBuffer *Demangled) { return parseMangle(Demangled, this->Str); diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index ae48d0333d67..8668fe82601c 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1801,46 +1801,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, } else if (isa(C1) && isa(C2)) { const APFloat &C1V = cast(C1)->getValueAPF(); const APFloat &C2V = cast(C2)->getValueAPF(); - APFloat::cmpResult R = C1V.compare(C2V); - switch (pred) { - default: llvm_unreachable("Invalid FCmp Predicate"); - case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy); - case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy); - case FCmpInst::FCMP_UNO: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered); - case FCmpInst::FCMP_ORD: - return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered); - case FCmpInst::FCMP_UEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_OEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpEqual); - case FCmpInst::FCMP_UNE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual); - case FCmpInst::FCMP_ONE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan); - case FCmpInst::FCMP_OLT: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan); - case FCmpInst::FCMP_UGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OLE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_UGE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan); - case FCmpInst::FCMP_OGE: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual); - } + CmpInst::Predicate Predicate = CmpInst::Predicate(pred); + return ConstantInt::get(ResultTy, FCmpInst::compare(C1V, C2V, Predicate)); } else if (auto *C1VTy = dyn_cast(C1->getType())) { // Fast path for splatted constants. diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index ce349f66c916..7798af3b19b9 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2324,7 +2324,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef Mask, } Src1Elts.setBit(i); Src1Identity &= (M == (i + NumSrcElts)); - continue; } assert((Src0Elts | Src1Elts | UndefElts).isAllOnes() && "unknown shuffle elements"); @@ -4161,6 +4160,47 @@ bool ICmpInst::compare(const APInt &LHS, const APInt &RHS, }; } +bool FCmpInst::compare(const APFloat &LHS, const APFloat &RHS, + FCmpInst::Predicate Pred) { + APFloat::cmpResult R = LHS.compare(RHS); + switch (Pred) { + default: + llvm_unreachable("Invalid FCmp Predicate"); + case FCmpInst::FCMP_FALSE: + return false; + case FCmpInst::FCMP_TRUE: + return true; + case FCmpInst::FCMP_UNO: + return R == APFloat::cmpUnordered; + case FCmpInst::FCMP_ORD: + return R != APFloat::cmpUnordered; + case FCmpInst::FCMP_UEQ: + return R == APFloat::cmpUnordered || R == APFloat::cmpEqual; + case FCmpInst::FCMP_OEQ: + return R == APFloat::cmpEqual; + case FCmpInst::FCMP_UNE: + return R != APFloat::cmpEqual; + case FCmpInst::FCMP_ONE: + return R == APFloat::cmpLessThan || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULT: + return R == APFloat::cmpUnordered || R == APFloat::cmpLessThan; + case FCmpInst::FCMP_OLT: + return R == APFloat::cmpLessThan; + case FCmpInst::FCMP_UGT: + return R == APFloat::cmpUnordered || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OGT: + return R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULE: + return R != APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OLE: + return R == APFloat::cmpLessThan || R == APFloat::cmpEqual; + case FCmpInst::FCMP_UGE: + return R != APFloat::cmpLessThan; + case FCmpInst::FCMP_OGE: + return R == APFloat::cmpGreaterThan || R == APFloat::cmpEqual; + } +} + CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) { assert(CmpInst::isRelational(pred) && "Call only with non-equality predicates!"); @@ -4407,7 +4447,7 @@ void SwitchInstProfUpdateWrapper::addCase( Weights.getValue()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { Changed = true; - Weights.getValue().push_back(W ? *W : 0); + Weights.getValue().push_back(W.getValueOr(0)); } if (Weights) assert(SI.getNumSuccessors() == Weights->size() && diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 50605f50769d..fb7c423e54e2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -602,17 +602,22 @@ void Verifier::visit(Instruction &I) { InstVisitor::visit(I); } -// Helper to recursively iterate over indirect users. By -// returning false, the callback can ask to stop recursing -// further. +// Helper to iterate over indirect users. By returning false, the callback can ask to stop traversing further. static void forEachUser(const Value *User, SmallPtrSet &Visited, llvm::function_ref Callback) { if (!Visited.insert(User).second) return; - for (const Value *TheNextUser : User->materialized_users()) - if (Callback(TheNextUser)) - forEachUser(TheNextUser, Visited, Callback); + + SmallVector WorkList; + append_range(WorkList, User->materialized_users()); + while (!WorkList.empty()) { + const Value *Cur = WorkList.pop_back_val(); + if (!Visited.insert(Cur).second) + continue; + if (Callback(Cur)) + append_range(WorkList, Cur->materialized_users()); + } } void Verifier::visitGlobalValue(const GlobalValue &GV) { diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index d18579ad4bfc..16941b1cb727 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -877,8 +877,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, [&](const MCAssembler::VersionInfoType &VersionInfo) { auto EncodeVersion = [](VersionTuple V) -> uint32_t { assert(!V.empty() && "empty version"); - unsigned Update = V.getSubminor() ? *V.getSubminor() : 0; - unsigned Minor = V.getMinor() ? *V.getMinor() : 0; + unsigned Update = V.getSubminor().getValueOr(0); + unsigned Minor = V.getMinor().getValueOr(0); assert(Update < 256 && "unencodable update target version"); assert(Minor < 256 && "unencodable minor target version"); assert(V.getMajor() < 65536 && "unencodable major target version"); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 15a249e6177e..e2e4340f44e9 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -565,7 +565,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // TODO: Use version number when setting target features // Currently LLVM supports only "mafdcbv". StringRef SupportedStandardExtension = "mafdcbv"; - if (SupportedStandardExtension.find(C) == StringRef::npos) + if (!SupportedStandardExtension.contains(C)) return createStringError(errc::invalid_argument, "unsupported standard user-level extension '%c'", C); diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 209f9f7255a5..793663ef97d7 100644 --- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -138,8 +138,8 @@ bool FalkorMarkStridedAccesses::run() { bool MadeChange = false; for (Loop *L : LI) - for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt) - MadeChange |= runOnLoop(**LIt); + for (Loop *LIt : depth_first(L)) + MadeChange |= runOnLoop(*LIt); return MadeChange; } @@ -828,10 +828,10 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { Modified = false; for (MachineLoop *I : LI) - for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) + for (MachineLoop *L : depth_first(I)) // Only process inner-loops if (L->isInnermost()) - runOnLoop(**L, Fn); + runOnLoop(*L, Fn); return Modified; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 727e16069fef..ebccc07edc7a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4378,6 +4378,32 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), (v4i32 VImm8000)))), (SQXTNv4i16 V128:$Vn)>; +// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) +// with reversed min/max +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), + (v8i16 VImm7F)))))), + (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), + (v8i16 VImm80)))))), + (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; + +// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) +// with reversed min/max +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), + (v4i32 VImm7FFF)))))), + (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), + (v4i32 VImm8000)))))), + (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; + //===----------------------------------------------------------------------===// // Advanced SIMD three vector instructions. //===----------------------------------------------------------------------===// @@ -6577,6 +6603,34 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; +// RADDHN patterns for when RSHRN shifts by half the size of the vector element +def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), + (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), + (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), + (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; + +// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), + (RADDHNv8i16_v16i8 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, + (v8i16 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), + (RADDHNv4i32_v8i16 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, + (v4i32 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v4i32 (concat_vectors + (v2i32 V64:$Vd), + (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), + (RADDHNv2i64_v4i32 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, + (v2i64 (MOVIv2d_ns (i32 0))))>; + // SHRN patterns for when a logical right shift was used instead of arithmetic // (the immediate guarantees no sign bits actually end up in the result so it // doesn't matter). diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 301e6f6d6f42..e79ff9b597c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -378,5 +378,4 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, } MI.eraseFromParent(); - return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 7fd1aa3044fe..5046daaed977 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -533,7 +533,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16, V2S16}) .minScalar(0, S16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .widenScalarToNextMultipleOf(0, 32) .maxScalar(0, S32) .scalarize(0); @@ -541,7 +541,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) .legalFor({S32, S16, V2S16}) // Clamp modifier .minScalarOrElt(0, S16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .widenScalarToNextPow2(0, 32) .lower(); @@ -712,7 +712,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } if (ST.hasVOP3PInsts()) - FPOpActions.clampMaxNumElements(0, S16, 2); + FPOpActions.clampMaxNumElementsStrict(0, S16, 2); FPOpActions .scalarize(0) @@ -728,7 +728,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_FNEG, G_FABS}) .legalFor(FPTypesPK16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .clampScalar(0, S16, S64); @@ -965,7 +965,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.has16BitInsts()) { getActionDefinitionsBuilder(G_BSWAP) .legalFor({S16, S32, V2S16}) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) // FIXME: Fixing non-power-of-2 before clamp is workaround for // narrowScalar limitation. .widenScalarToNextPow2(0) @@ -1425,6 +1425,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + .lowerIf([=](const LegalityQuery &Query) { + // Sub-vector(or single element) insert and extract. + // TODO: verify immediate offset here since lower only works with + // whole elements. + const LLT BigTy = Query.Types[BigTyIdx]; + return BigTy.isVector(); + }) // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; @@ -1583,7 +1590,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Prefer to reduce vector widths for 16-bit vectors before lowering, to // get more vector shift opportunities, since we'll get those when // expanded. - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)); + .clampMaxNumElementsStrict(0, S16, 2); } else if (ST.has16BitInsts()) { SextInReg.lowerFor({{S32}, {S64}, {S16}}); } else { @@ -1605,14 +1612,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_FSHR) .legalFor({{S32, S32}}) .lowerFor({{V2S16, V2S16}}) - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .lower(); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_FSHL) .lowerFor({{V2S16, V2S16}}) - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .lower(); } else { @@ -2535,10 +2542,8 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper, } else { // For cases where the widened type isn't a nice register value, unmerge // from a widened register (e.g. <3 x s16> -> <4 x s16>) - B.setInsertPt(B.getMBB(), ++B.getInsertPt()); - WideLoad = Helper.widenWithUnmerge(WideTy, ValReg); - B.setInsertPt(B.getMBB(), MI.getIterator()); - B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0); + WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0); + B.buildDeleteTrailingVectorElements(ValReg, WideLoad); } } @@ -3811,6 +3816,10 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, llvm_unreachable("invalid data type"); } + if (StoreVT == LLT::fixed_vector(3, S16)) { + Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg) + .getReg(0); + } return Reg; } @@ -4653,9 +4662,23 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // Deal with the one annoying legal case. const LLT V3S16 = LLT::fixed_vector(3, 16); if (Ty == V3S16) { - padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1); - auto Concat = B.buildConcatVectors(LLT::fixed_vector(6, 16), ResultRegs); - B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat); + if (IsTFE) { + if (ResultRegs.size() == 1) { + NewResultReg = ResultRegs[0]; + } else if (ResultRegs.size() == 2) { + LLT V4S16 = LLT::fixed_vector(4, 16); + NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0); + } else { + return false; + } + } + + if (MRI->getType(DstReg).getNumElements() < + MRI->getType(NewResultReg).getNumElements()) { + B.buildDeleteTrailingVectorElements(DstReg, NewResultReg); + } else { + B.buildPadVectorWithUndefElements(DstReg, NewResultReg); + } return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 2bb77d9de0b2..c60012bcfe2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1162,18 +1162,25 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI, // 96-bit loads are only available for vector loads. We need to split this // into a 64-bit part, and 32 (unless we can widen to a 128-bit load). if (MMO->getAlign() < Align(16)) { + MachineFunction *MF = MI.getParent()->getParent(); + ApplyRegBankMapping ApplyBank(*this, MRI, DstBank); + MachineIRBuilder B(MI, ApplyBank); + LegalizerHelper Helper(*MF, ApplyBank, B); LLT Part64, Part32; std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64); - auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0); - auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8); - - auto Undef = B.buildUndef(LoadTy); - auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0); - B.buildInsert(MI.getOperand(0), Ins0, Load1, 64); + if (Helper.reduceLoadStoreWidth(cast(MI), 0, Part64) != + LegalizerHelper::Legalized) + return false; + return true; } else { LLT WiderTy = widen96To128(LoadTy); auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0); - B.buildExtract(MI.getOperand(0), WideLoad, 0); + if (WiderTy.isScalar()) + B.buildTrunc(MI.getOperand(0), WideLoad); + else { + B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(), + WideLoad); + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp index 2a2338ac7016..2475b44b42a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -428,7 +428,7 @@ class CollectReachableCallees { // // FIXME: Need to handle bit-casted function pointers. // - SmallVector CGNStack(df_begin(KCGN), df_end(KCGN)); + SmallVector CGNStack(depth_first(KCGN)); SmallPtrSet VisitedCGNodes; while (!CGNStack.empty()) { auto *CGN = CGNStack.pop_back_val(); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 4706c74be721..d4fe74ecb96e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1167,11 +1167,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( if (SpillVGPRToAGPR) { // To track the spill frame indices handled in this pass. BitVector SpillFIs(MFI.getObjectIndexEnd(), false); + BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); bool SeenDbgInstr = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + int FrameIndex; if (MI.isDebugInstr()) SeenDbgInstr = true; @@ -1191,10 +1193,18 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( SpillFIs.set(FI); continue; } - } + } else if (TII->isStoreToStackSlot(MI, FrameIndex) || + TII->isLoadFromStackSlot(MI, FrameIndex)) + NonVGPRSpillFIs.set(FrameIndex); } } + // Stack slot coloring may assign different objets to the same stack slot. + // If not, then the VGPR to AGPR spill slot is dead. + for (unsigned FI : SpillFIs.set_bits()) + if (!NonVGPRSpillFIs.test(FI)) + FuncInfo->setVGPRToAGPRSpillDead(FI); + for (MachineBasicBlock &MBB : MF) { for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) MBB.addLiveIn(Reg); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ce2c8a02194..1755b93538ce 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2395,8 +2395,6 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); auto *ShAmt = MCConstantExpr::create(32, MCCtx); OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); - - return; } unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2bbce1d8ba93..636337ede000 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2212,18 +2212,18 @@ def : GCNPat < >; def : GCNPat < - (i1 (trunc i32:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i32:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i16:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i16:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i64:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), + (i1 (DivergentUnaryFrag i64:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; @@ -2429,20 +2429,36 @@ def : GCNPat < // COPY is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat < - (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))), + (v2i16 (UniformBinFrag (i16 0), (i16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))), + (v2i16 (DivergentBinFrag (i16 0), (i16 SReg_32:$src1))), + (v2i16 (V_LSHLREV_B32_e64 (i16 16), SReg_32:$src1)) +>; + + +def : GCNPat < + (v2i16 (UniformBinFrag (i16 SReg_32:$src1), (i16 0))), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) >; def : GCNPat < - (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))), + (v2i16 (DivergentBinFrag (i16 SReg_32:$src1), (i16 0))), + (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src1)) +>; + +def : GCNPat < + (v2f16 (UniformBinFrag (f16 SReg_32:$src1), (f16 FP_ZERO))), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) >; +def : GCNPat < + (v2f16 (DivergentBinFrag (f16 SReg_32:$src1), (f16 FP_ZERO))), + (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src1)) +>; + def : GCNPat < (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))), (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) @@ -2459,41 +2475,73 @@ def : GCNPat < >; def : GCNPat < - (v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))), + (v2i16 (UniformBinFrag (i16 undef), (i16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i32 16)) >; def : GCNPat < - (v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))), + (v2i16 (DivergentBinFrag (i16 undef), (i16 SReg_32:$src1))), + (v2i16 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)) +>; + + +def : GCNPat < + (v2f16 (UniformBinFrag (f16 undef), (f16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i32 16)) >; +def : GCNPat < + (v2f16 (DivergentBinFrag (f16 undef), (f16 SReg_32:$src1))), + (v2f16 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)) +>; + let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))), + (v2i16 (UniformBinFrag (i16 SReg_32:$src0), (i16 SReg_32:$src1))), (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; +def : GCNPat < + (v2i16 (DivergentBinFrag (i16 SReg_32:$src0), (i16 SReg_32:$src1))), + (v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0)))) +>; + // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (UniformBinFrag (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), (v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1)) >; +def : GCNPat < + (v2i16 (DivergentBinFrag (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (V_BFI_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src0, SReg_32:$src1)) +>; + def : GCNPat < - (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), + (v2i16 (UniformBinFrag (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), (S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1) >; -// TODO: Should source modifiers be matched to v_pack_b32_f16? def : GCNPat < - (v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))), + (v2i16 (DivergentBinFrag (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), + (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (V_AND_OR_B32_e64 SReg_32:$src1, (i32 (V_MOV_B32_e32 (i32 0xffff0000))), (i32 (V_LSHRREV_B32_e64 (i32 16), SReg_32:$src0)))) +>; + +def : GCNPat < + (v2f16 (UniformBinFrag (f16 SReg_32:$src0), (f16 SReg_32:$src1))), (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; +def : GCNPat < + (v2f16 (DivergentBinFrag (f16 SReg_32:$src0), (f16 SReg_32:$src1))), + (v2f16 (V_LSHL_OR_B32_e64 SReg_32:$src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src0)))) +>; + + def : GCNPat < (v2f16 (is_canonicalized (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)), (f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))), diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 6f92ac6dac45..3ce368ef4db9 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -444,7 +444,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { MFI.setStackID(i, TargetStackID::Default); for (auto &R : VGPRToAGPRSpills) { - if (R.second.FullyAllocated) + if (R.second.IsDead) MFI.RemoveStackObject(R.first); } } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index c305bc20e40d..8accbf611c5f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -465,6 +465,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { struct VGPRSpillToAGPR { SmallVector Lanes; bool FullyAllocated = false; + bool IsDead = false; }; // Map WWM VGPR to a stack slot that is used to save/restore it in the @@ -546,6 +547,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { : I->second.Lanes[Lane]; } + void setVGPRToAGPRSpillDead(int FrameIndex) { + auto I = VGPRToAGPRSpills.find(FrameIndex); + if (I != VGPRToAGPRSpills.end()) + I->second.IsDead = true; + } + bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index a3eccf13cd71..a8368892c565 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -794,6 +794,18 @@ class VOPPatGen { list ret = [!con(Outs, (set Ins))]; } +class DivergentUnaryFrag : PatFrag < + (ops node:$src0), + (Op $src0), + [{ return N->isDivergent(); }]> { + // This check is unnecessary as it's captured by the result register + // bank constraint. + // + // FIXME: Should add a way for the emitter to recognize this is a + // trivially true predicate to eliminate the check. + let GISelPredicateCode = [{return true;}]; +} + class VOPPatOrNull { list ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen.ret, []); } diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 798d08393eae..51060018a5ca 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -571,8 +571,6 @@ void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // See lib/CodeGen/RegisterRelaxation.cpp for details. // We end up here when a jump is too long for a RJMP instruction. BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); - - return; } } // end of namespace llvm diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index 7518fd774a48..ae811b30434d 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -29,6 +29,8 @@ def ProcTinyCore: SubtargetFeature<"tinycore", "HexagonProcFamily", // Hexagon ISA Extensions def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true", "Hexagon ZReg extension instructions">; +def ExtensionHVXQFloat: SubtargetFeature<"hvx-qfloat", "UseHVXQFloatOps", + "true", "Hexagon HVX QFloating point instructions">; def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion", "Hexagon::ArchEnum::V60", "Hexagon HVX instructions">; @@ -52,6 +54,10 @@ def ExtensionHVXV68: SubtargetFeature<"hvxv68", "HexagonHVXVersion", "Hexagon::ArchEnum::V68", "Hexagon HVX instructions", [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67]>; +def ExtensionHVXV69: SubtargetFeature<"hvxv69", "HexagonHVXVersion", + "Hexagon::ArchEnum::V69", "Hexagon HVX instructions", + [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, + ExtensionHVXV67, ExtensionHVXV68]>; def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; @@ -61,6 +67,9 @@ def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps", def ExtensionAudio: SubtargetFeature<"audio", "UseAudioOps", "true", "Hexagon Audio extension instructions">; +def ExtensionHVXIEEEFP: SubtargetFeature<"hvx-ieee-fp", "UseHVXIEEEFPOps", + "true", "Hexagon HVX IEEE floating point instructions">; + def FeatureCompound: SubtargetFeature<"compound", "UseCompound", "true", "Use compound instructions">; def FeaturePackets: SubtargetFeature<"packets", "UsePackets", "true", @@ -88,6 +97,8 @@ def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19", def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim", "NoreturnStackElim", "true", "Eliminate stack allocation in a noreturn function when possible">; +def FeatureCabac: SubtargetFeature<"cabac", "UseCabac", "false", + "Emit the CABAC instruction">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. @@ -112,6 +123,8 @@ def UseHVXV67 : Predicate<"HST->useHVXV67Ops()">, AssemblerPredicate<(all_of ExtensionHVXV67)>; def UseHVXV68 : Predicate<"HST->useHVXV68Ops()">, AssemblerPredicate<(all_of ExtensionHVXV68)>; +def UseHVXV69 : Predicate<"HST->useHVXV69Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV69)>; def UseAudio : Predicate<"HST->useAudioOps()">, AssemblerPredicate<(all_of ExtensionAudio)>; def UseZReg : Predicate<"HST->useZRegOps()">, @@ -119,6 +132,11 @@ def UseZReg : Predicate<"HST->useZRegOps()">, def UseCompound : Predicate<"HST->useCompound()">; def HasPreV65 : Predicate<"HST->hasPreV65()">, AssemblerPredicate<(all_of FeaturePreV65)>; +def UseHVXIEEEFP : Predicate<"HST->useHVXIEEEFPOps()">, + AssemblerPredicate<(all_of ExtensionHVXIEEEFP)>; +def UseHVXQFloat : Predicate<"HST->useHVXQFloatOps()">, + AssemblerPredicate<(all_of ExtensionHVXQFloat)>; +def UseHVXFloatingPoint: Predicate<"HST->useHVXFloatingPoint()">; def HasMemNoShuf : Predicate<"HST->hasMemNoShuf()">, AssemblerPredicate<(all_of FeatureMemNoShuf)>; def UseUnsafeMath : Predicate<"HST->useUnsafeMath()">; @@ -127,6 +145,8 @@ def NotOptTinyCore : Predicate<"!HST->isTinyCore() ||" let RecomputePerFunction = 1; } def UseSmallData : Predicate<"HST->useSmallData()">; +def UseCabac : Predicate<"HST->useCabac()">, + AssemblerPredicate<(any_of FeatureCabac)>; def Hvx64: HwMode<"+hvx-length64b">; def Hvx128: HwMode<"+hvx-length128b">; @@ -299,7 +319,7 @@ def changeAddrMode_rr_ur: InstrMapping { let ValueCols = [["BaseLongOffset"]]; } -def changeAddrMode_ur_rr : InstrMapping { +def changeAddrMode_ur_rr: InstrMapping { let FilterClass = "ImmRegShl"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; let ColFields = ["addrMode"]; @@ -370,40 +390,55 @@ class Proc; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv5", HexagonModelV5, [ArchV5, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv55", HexagonModelV55, [ArchV5, ArchV55, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv60", HexagonModelV60, [ArchV5, ArchV55, ArchV60, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv62", HexagonModelV62, [ArchV5, ArchV55, ArchV60, ArchV62, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv65", HexagonModelV65, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv66", HexagonModelV66, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv67", HexagonModelV67, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv68", HexagonModelV68, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, ArchV68, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; +def : Proc<"hexagonv69", HexagonModelV69, + [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, + ArchV68, ArchV69, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; // Need to update the correct features for tiny core. // Disable NewValueJumps since the packetizer is unable to handle a packet with // a new value jump and another SLOT0 instruction. diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index 7a43a4440b2d..56174dc7e136 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -21,31 +21,32 @@ namespace llvm { namespace Hexagon { -enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68 }; +enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 }; -static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68}; +static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68, 69}; static constexpr ArrayRef ArchValsNum(ArchValsNumArray); -static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68" }; +static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68", "v69" }; static constexpr ArrayRef ArchValsText(ArchValsTextArray); -static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68" }; +static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68", "hexagonv69" }; static constexpr ArrayRef CpuValsText(CpuValsTextArray); -static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68" }; +static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68", "v69" }; static constexpr ArrayRef CpuNickText(CpuNickTextArray); static const std::map CpuTable{ - {"generic", Hexagon::ArchEnum::V5}, - {"hexagonv5", Hexagon::ArchEnum::V5}, - {"hexagonv55", Hexagon::ArchEnum::V55}, - {"hexagonv60", Hexagon::ArchEnum::V60}, - {"hexagonv62", Hexagon::ArchEnum::V62}, - {"hexagonv65", Hexagon::ArchEnum::V65}, - {"hexagonv66", Hexagon::ArchEnum::V66}, - {"hexagonv67", Hexagon::ArchEnum::V67}, - {"hexagonv67t", Hexagon::ArchEnum::V67}, - {"hexagonv68", Hexagon::ArchEnum::V68}, + {"generic", Hexagon::ArchEnum::V5}, + {"hexagonv5", Hexagon::ArchEnum::V5}, + {"hexagonv55", Hexagon::ArchEnum::V55}, + {"hexagonv60", Hexagon::ArchEnum::V60}, + {"hexagonv62", Hexagon::ArchEnum::V62}, + {"hexagonv65", Hexagon::ArchEnum::V65}, + {"hexagonv66", Hexagon::ArchEnum::V66}, + {"hexagonv67", Hexagon::ArchEnum::V67}, + {"hexagonv67t", Hexagon::ArchEnum::V67}, + {"hexagonv68", Hexagon::ArchEnum::V68}, + {"hexagonv69", Hexagon::ArchEnum::V69}, }; static const std::map ElfFlagsByCpuStr = { @@ -59,6 +60,7 @@ static const std::map ElfFlagsByCpuStr = { {"hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67}, {"hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T}, {"hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68}, + {"hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69}, }; static const std::map ElfArchByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V5, "V5"}, @@ -70,6 +72,7 @@ static const std::map ElfArchByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V67, "V67"}, {llvm::ELF::EF_HEXAGON_MACH_V67T, "V67T"}, {llvm::ELF::EF_HEXAGON_MACH_V68, "V68"}, + {llvm::ELF::EF_HEXAGON_MACH_V69, "V69"}, }; static const std::map ElfCpuByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V5, "hexagonv5"}, @@ -81,6 +84,7 @@ static const std::map ElfCpuByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V67, "hexagonv67"}, {llvm::ELF::EF_HEXAGON_MACH_V67T, "hexagonv67t"}, {llvm::ELF::EF_HEXAGON_MACH_V68, "hexagonv68"}, + {llvm::ELF::EF_HEXAGON_MACH_V69, "hexagonv69"}, }; } // namespace Hexagon diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td index e743a291f1e5..e4f24e3c2e66 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.td +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td @@ -24,3 +24,5 @@ def ArchV67: SubtargetFeature<"v67", "HexagonArchVersion", "Hexagon::ArchEnum::V def HasV67 : Predicate<"HST->hasV67Ops()">, AssemblerPredicate<(all_of ArchV67)>; def ArchV68: SubtargetFeature<"v68", "HexagonArchVersion", "Hexagon::ArchEnum::V68", "Enable Hexagon V68 architecture">; def HasV68 : Predicate<"HST->hasV68Ops()">, AssemblerPredicate<(all_of ArchV68)>; +def ArchV69: SubtargetFeature<"v69", "HexagonArchVersion", "Hexagon::ArchEnum::V69", "Enable Hexagon V69 architecture">; +def HasV69 : Predicate<"HST->hasV69Ops()">, AssemblerPredicate<(all_of ArchV69)>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc index 40f6e14aed13..7164af3ad5c6 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc +++ b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc @@ -8,6 +8,7 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// + #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-function" diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index a1db3ae7239d..d195df918293 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -11,6 +11,7 @@ def tc_04da405a : InstrItinClass; def tc_05ca8cfd : InstrItinClass; def tc_08a4f1b6 : InstrItinClass; +def tc_0afc8be9 : InstrItinClass; def tc_0b04c6c7 : InstrItinClass; def tc_0ec46cf9 : InstrItinClass; def tc_131f1c81 : InstrItinClass; @@ -21,6 +22,7 @@ def tc_191381c1 : InstrItinClass; def tc_1ad8a370 : InstrItinClass; def tc_1ba8a0cd : InstrItinClass; def tc_20a4bbec : InstrItinClass; +def tc_2120355e : InstrItinClass; def tc_257f6f7c : InstrItinClass; def tc_26a377fe : InstrItinClass; def tc_2b4c548e : InstrItinClass; @@ -28,15 +30,18 @@ def tc_2c745bb8 : InstrItinClass; def tc_2d4051cd : InstrItinClass; def tc_2e8f5f6e : InstrItinClass; def tc_309dbb4f : InstrItinClass; +def tc_37820f4c : InstrItinClass; def tc_3904b926 : InstrItinClass; def tc_3aacf4a8 : InstrItinClass; def tc_3ad719fb : InstrItinClass; def tc_3c56e5ce : InstrItinClass; +def tc_3c8c15d0 : InstrItinClass; def tc_3ce09744 : InstrItinClass; def tc_3e2aaafc : InstrItinClass; def tc_447d9895 : InstrItinClass; def tc_453fe68d : InstrItinClass; def tc_46d6c3e0 : InstrItinClass; +def tc_4942646a : InstrItinClass; def tc_51d0ecc3 : InstrItinClass; def tc_52447ecc : InstrItinClass; def tc_540c3da3 : InstrItinClass; @@ -46,6 +51,7 @@ def tc_56c4f9fe : InstrItinClass; def tc_56e64202 : InstrItinClass; def tc_58d21193 : InstrItinClass; def tc_5bf8afbb : InstrItinClass; +def tc_5cdf8c84 : InstrItinClass; def tc_61bf7c03 : InstrItinClass; def tc_649072c2 : InstrItinClass; def tc_660769f1 : InstrItinClass; @@ -57,6 +63,8 @@ def tc_71646d06 : InstrItinClass; def tc_7177e272 : InstrItinClass; def tc_718b5c53 : InstrItinClass; def tc_7273323b : InstrItinClass; +def tc_72e2b393 : InstrItinClass; +def tc_73efe966 : InstrItinClass; def tc_7417e785 : InstrItinClass; def tc_767c4e9d : InstrItinClass; def tc_7d68d5c2 : InstrItinClass; @@ -71,9 +79,11 @@ def tc_9d1dc972 : InstrItinClass; def tc_9f363d21 : InstrItinClass; def tc_a02a10a8 : InstrItinClass; def tc_a0dbea28 : InstrItinClass; +def tc_a19b9305 : InstrItinClass; def tc_a28f32b5 : InstrItinClass; def tc_a69eeee1 : InstrItinClass; def tc_a7e6707d : InstrItinClass; +def tc_aa047364 : InstrItinClass; def tc_ab23f776 : InstrItinClass; def tc_abe8c3b2 : InstrItinClass; def tc_ac4046bc : InstrItinClass; @@ -89,8 +99,10 @@ def tc_c4edf264 : InstrItinClass; def tc_c5dba46e : InstrItinClass; def tc_c7039829 : InstrItinClass; def tc_cd94bfe0 : InstrItinClass; +def tc_cda936da : InstrItinClass; def tc_d8287c14 : InstrItinClass; def tc_db5555f3 : InstrItinClass; +def tc_dcca380f : InstrItinClass; def tc_dd5b0695 : InstrItinClass; def tc_df80eeb0 : InstrItinClass; def tc_e2d2e9e5 : InstrItinClass; @@ -99,6 +111,7 @@ def tc_e3f68a46 : InstrItinClass; def tc_e675c45a : InstrItinClass; def tc_e699ae41 : InstrItinClass; def tc_e99d4c2e : InstrItinClass; +def tc_f175e046 : InstrItinClass; def tc_f1de44ef : InstrItinClass; def tc_f21e8abb : InstrItinClass; @@ -119,6 +132,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -174,6 +192,10 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -209,6 +231,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -231,6 +258,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -259,6 +291,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -306,6 +343,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -363,6 +405,16 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -437,6 +489,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -456,6 +513,10 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -537,6 +598,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -547,6 +613,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -589,6 +660,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -620,6 +696,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -675,6 +756,10 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -710,6 +795,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -732,6 +822,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -760,6 +855,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -807,6 +907,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -864,6 +969,16 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -938,6 +1053,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -957,6 +1077,10 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -1038,6 +1162,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -1048,6 +1177,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -1090,6 +1224,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1121,6 +1260,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1176,6 +1320,10 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -1211,6 +1359,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -1233,6 +1386,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -1261,6 +1419,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -1308,6 +1471,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -1365,6 +1533,16 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -1439,6 +1617,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -1458,6 +1641,10 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -1539,6 +1726,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -1549,6 +1741,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -1591,6 +1788,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1622,6 +1824,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1677,6 +1884,10 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -1712,6 +1923,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -1734,6 +1950,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -1762,6 +1983,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -1809,6 +2035,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -1866,6 +2097,16 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -1940,6 +2181,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -1959,6 +2205,10 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -2040,6 +2290,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -2050,6 +2305,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -2092,6 +2352,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2123,6 +2388,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2178,6 +2448,10 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -2213,6 +2487,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -2235,6 +2514,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -2263,6 +2547,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -2310,6 +2599,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -2367,6 +2661,16 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -2441,6 +2745,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -2460,6 +2769,10 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -2541,6 +2854,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -2551,6 +2869,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -2593,6 +2916,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2624,6 +2952,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2679,6 +3012,10 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -2714,6 +3051,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -2736,6 +3078,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -2764,6 +3111,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -2811,6 +3163,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -2868,6 +3225,16 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -2942,6 +3309,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -2961,6 +3333,10 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -3042,6 +3418,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -3052,6 +3433,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -3094,6 +3480,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -3125,6 +3516,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -3180,6 +3576,10 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -3215,6 +3615,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -3237,7 +3642,576 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +} + +class DepHVXItinV69 { + list DepHVXItinV69_list = [ + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -3265,6 +4239,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -3312,6 +4291,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -3369,6 +4353,16 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -3443,6 +4437,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -3462,6 +4461,10 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -3543,6 +4546,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -3553,6 +4561,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -3595,6 +4608,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td index a3766652794b..a979bafe8e33 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -7338,3 +7338,771 @@ class DepScalarItinV68 { [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> ]; } + +class DepScalarItinV69 { + list DepScalarItinV69_list = [ + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +} diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td index b3f1b6638193..65d36924ba48 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td @@ -2288,6 +2288,12 @@ class Enc_a30110 : OpcodeHexagon { bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; } +class Enc_a33d04 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} class Enc_a42857 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -3109,6 +3115,14 @@ class Enc_de0214 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_de5ea0 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} class Enc_e07374 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index 4f00409c336c..c02988266584 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -5824,8 +5824,8 @@ let Inst{31-21} = 0b01010100100; let hasNewValue = 1; let opNewValue = 0; let isSolo = 1; -let Uses = [GOSP]; -let Defs = [GOSP, PC]; +let Uses = [CCR, GOSP]; +let Defs = [CCR, GOSP, PC]; let hasSideEffects = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -8500,6 +8500,8 @@ let Inst{31-21} = 0b01010010101; let isTerminator = 1; let isIndirectBranch = 1; let isBranch = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let cofMax1 = 1; } def J4_jumpseti : HInst< @@ -18210,16 +18212,6 @@ let opExtentBits = 18; let opExtentAlign = 2; let opNewValue = 1; } -def PS_trap1 : HInst< -(outs), -(ins u8_0Imm:$Ii), -"trap1(#$Ii)", -tc_53c851ab, TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> { -let Inst{1-0} = 0b00; -let Inst{7-5} = 0b000; -let Inst{13-13} = 0b0; -let Inst{31-16} = 0b0101010010000000; -} def R6_release_at_vi : HInst< (outs), (ins IntRegs:$Rs32), @@ -18964,7 +18956,7 @@ def S2_cabacdecbin : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = decbin($Rss32,$Rtt32)", -tc_db596beb, TypeS_3op>, Enc_a56825 { +tc_db596beb, TypeS_3op>, Enc_a56825, Requires<[UseCabac]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -26883,17 +26875,6 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } -def V6_ldntnt0 : HInst< -(outs HvxVR:$Vd32), -(ins IntRegs:$Rt32), -"$Vd32 = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62]> { -let hasNewValue = 1; -let opNewValue = 0; -let isPseudo = 1; -let isCodeGenOnly = 1; -let DecoderNamespace = "EXT_mmvec"; -} def V6_ldp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), @@ -27312,6 +27293,30 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_v10mpyubs10 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxWR:$Vuu32, HvxWR:$Vvv32, u1_0Imm:$Ii), +"$Vdd32.w = v10mpy($Vuu32.ub,$Vvv32.b,#$Ii)", +tc_f175e046, TypeCVI_VX>, Requires<[UseHVXV69]> { +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isPseudo = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_v10mpyubs10_vxx : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, HvxWR:$Vvv32, u1_0Imm:$Ii), +"$Vxx32.w += v10mpy($Vuu32.ub,$Vvv32.b,#$Ii)", +tc_4942646a, TypeCVI_VX>, Requires<[UseHVXV69]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let isPseudo = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} def V6_v6mpyhubs10 : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32, u2_0Imm:$Ii), @@ -27396,7 +27401,7 @@ def V6_vL32Ub_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmemu($Rt32+#$Ii)", -tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> { +tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]>, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27408,13 +27413,15 @@ let isCVLoad = 1; let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32Ub_ai"; +let CextOpcode = "V6_vL32Ub"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32Ub_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmemu($Rx32++#$Ii)", -tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> { +tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]>, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27427,6 +27434,7 @@ let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; +let CextOpcode = "V6_vL32Ub"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27452,7 +27460,7 @@ def V6_vL32b_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii)", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27465,6 +27473,7 @@ let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; +let CextOpcode = "V6_vL32b"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -27473,7 +27482,7 @@ def V6_vL32b_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii)", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27487,6 +27496,7 @@ let CVINew = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; +let CextOpcode = "V6_vL32b_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -27560,7 +27570,7 @@ def V6_vL32b_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii)", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27574,6 +27584,7 @@ let CVINew = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; +let CextOpcode = "V6_vL32b_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27729,7 +27740,7 @@ def V6_vL32b_nt_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii):nt", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27743,6 +27754,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_ai"; +let CextOpcode = "V6_vL32b_nt"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -27751,7 +27763,7 @@ def V6_vL32b_nt_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27766,6 +27778,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ai"; +let CextOpcode = "V6_vL32b_nt_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -27842,7 +27855,7 @@ def V6_vL32b_nt_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27857,6 +27870,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_pi"; +let CextOpcode = "V6_vL32b_nt_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28019,7 +28033,7 @@ def V6_vL32b_nt_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii):nt", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -28033,6 +28047,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_pi"; +let CextOpcode = "V6_vL32b_nt"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28127,7 +28142,7 @@ def V6_vL32b_nt_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -28137,11 +28152,12 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ai"; +let CextOpcode = "V6_vL32b_nt_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -28160,7 +28176,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28183,7 +28199,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28206,7 +28222,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28218,7 +28234,7 @@ def V6_vL32b_nt_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -28228,11 +28244,12 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_pi"; +let CextOpcode = "V6_vL32b_nt_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28250,7 +28267,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28273,7 +28290,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28295,7 +28312,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28317,7 +28334,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28329,7 +28346,7 @@ def V6_vL32b_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii)", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28342,6 +28359,7 @@ let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; +let CextOpcode = "V6_vL32b"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28432,7 +28450,7 @@ def V6_vL32b_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii)", -tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -28442,10 +28460,11 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; +let CextOpcode = "V6_vL32b_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -28464,7 +28483,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; @@ -28486,7 +28505,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; @@ -28508,7 +28527,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; @@ -28519,7 +28538,7 @@ def V6_vL32b_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii)", -tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28529,10 +28548,11 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; +let CextOpcode = "V6_vL32b_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28550,7 +28570,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; @@ -28572,7 +28592,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; @@ -28593,7 +28613,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; @@ -28614,7 +28634,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; @@ -28625,7 +28645,7 @@ def V6_vS32Ub_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmemu($Rt32+#$Ii) = $Vs32", -tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { +tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28634,6 +28654,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32Ub_ai"; +let CextOpcode = "V6_vS32Ub"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -28692,7 +28713,7 @@ def V6_vS32Ub_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmemu($Rx32++#$Ii) = $Vs32", -tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { +tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28701,6 +28722,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32Ub_pi"; +let CextOpcode = "V6_vS32Ub"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28773,7 +28795,7 @@ def V6_vS32b_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rt32+#$Ii) = $Vs32", -tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { +tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28782,6 +28804,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28790,7 +28813,7 @@ def V6_vS32b_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "vmem($Rt32+#$Ii) = $Os8.new", -tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel { +tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28802,6 +28825,7 @@ let CVINew = 1; let isNewValue = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 2; @@ -28873,7 +28897,7 @@ def V6_vS32b_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "vmem($Rx32++#$Ii) = $Os8.new", -tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel { +tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28885,6 +28909,7 @@ let CVINew = 1; let isNewValue = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29070,7 +29095,7 @@ def V6_vS32b_nt_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rt32+#$Ii):nt = $Vs32", -tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { +tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; @@ -29080,6 +29105,7 @@ let isCVI = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b_nt"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29088,7 +29114,7 @@ def V6_vS32b_nt_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "vmem($Rt32+#$Ii):nt = $Os8.new", -tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel { +tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; @@ -29101,6 +29127,7 @@ let isNewValue = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b_nt_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 2; @@ -29175,7 +29202,7 @@ def V6_vS32b_nt_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "vmem($Rx32++#$Ii):nt = $Os8.new", -tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel { +tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; @@ -29188,6 +29215,7 @@ let isNewValue = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b_nt_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29383,7 +29411,7 @@ def V6_vS32b_nt_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rx32++#$Ii):nt = $Vs32", -tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { +tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; @@ -29393,6 +29421,7 @@ let isCVI = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b_nt"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29519,7 +29548,7 @@ def V6_vS32b_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rx32++#$Ii) = $Vs32", -tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { +tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -29528,6 +29557,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29689,6 +29719,32 @@ let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } +def V6_vabs_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vabs($Vu32.hf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = vabs($Vu32.sf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vabsb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -29975,6 +30031,123 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vadd_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vadd($Vu32.hf,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_hf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vadd($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vadd($Vu32.qf16,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf16_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vadd($Vu32.qf16,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vadd($Vu32.qf32,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf32_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vadd($Vu32.qf32,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vadd($Vu32.sf,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vadd($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_sf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vadd($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vaddb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -31440,6 +31613,58 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vasrvuhubrndsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.ub = vasr($Vuu32.uh,$Vv32.ub):rnd:sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasrvuhubsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.ub = vasr($Vuu32.uh,$Vv32.ub):sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasrvwuhrndsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.uh = vasr($Vuu32.w,$Vv32.uh):rnd:sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasrvwuhsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.uh = vasr($Vuu32.w,$Vv32.uh):sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vasrw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), @@ -31597,6 +31822,33 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vassign_fp : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vfmv($Vu32.w)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vassign_tmp : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.tmp = $Vu32", +tc_2120355e, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b0; +let Inst{31-16} = 0b0001111000000001; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let hasHvxTmp = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vassignp : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32), @@ -32000,6 +32252,189 @@ let isCVI = 1; let isRegSequence = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vcombine_tmp : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.tmp = vcombine($Vu32,$Vv32)", +tc_aa047364, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011110101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let hasHvxTmp = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_hf_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = $Vu32.qf16", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_hf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32), +"$Vd32.hf = $Vuu32.qf32", +tc_51d0ecc3, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_sf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = $Vu32.qf32", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_b_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.b = vcvt($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_h_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.h = vcvt($Vu32.hf)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_b : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.hf = vcvt($Vu32.b)", +tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_h : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vcvt($Vu32.h)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vcvt($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_ub : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.hf = vcvt($Vu32.ub)", +tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_uh : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vcvt($Vu32.uh)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.sf = vcvt($Vu32.hf)", +tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_ub_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.ub = vcvt($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_uh_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.uh = vcvt($Vu32.hf)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vd0 : HInst< (outs HvxVR:$Vd32), (ins), @@ -32141,6 +32576,34 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vdmpy_sf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vdmpy($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vdmpy_sf_hf_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vx32.sf += vdmpy($Vu32.hf,$Vv32.hf)", +tc_a19b9305, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100010; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vdmpybus : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), @@ -32415,7 +32878,7 @@ def V6_vdmpyhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -32428,7 +32891,7 @@ def V6_vdmpyhsat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat", -tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { +tc_72e2b393, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -32523,7 +32986,7 @@ def V6_vdmpyhsusat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -32536,7 +32999,7 @@ def V6_vdmpyhsusat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat", -tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { +tc_72e2b393, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -32577,7 +33040,7 @@ def V6_vdmpyhvsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat", -tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { +tc_73efe966, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -32831,6 +33294,84 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } +def V6_vfmax_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vfmax($Vu32.hf,$Vv32.hf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfmax_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vfmax($Vu32.sf,$Vv32.sf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfmin_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vfmin($Vu32.hf,$Vv32.hf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfmin_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vfmin($Vu32.sf,$Vv32.sf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfneg_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vfneg($Vu32.hf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfneg_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = vfneg($Vu32.sf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vgathermh : HInst< (outs), (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32), @@ -32843,7 +33384,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32860,7 +33400,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32877,7 +33416,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32894,7 +33432,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32911,7 +33448,6 @@ let opNewValue = 0; let accessSize = WordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32928,7 +33464,6 @@ let opNewValue = 0; let accessSize = WordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -33033,6 +33568,106 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } +def V6_vgthf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b011101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgthf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b110011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgthf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b001101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgthf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b111011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtsf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b011100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgtsf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b110010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtsf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b001100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtsf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b111010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} def V6_vgtub : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -33552,6 +34187,32 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmax_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vmax($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmax_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vmax($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmaxb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -33677,6 +34338,32 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmin_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vmin($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmin_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vmin($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vminb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34110,6 +34797,179 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } +def V6_vmpy_hf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vmpy($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_hf_hf_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vx32.hf += vmpy($Vu32.hf,$Vv32.hf)", +tc_a19b9305, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100010; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} +def V6_vmpy_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vmpy($Vu32.qf16,$Vv32.qf16)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vmpy($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf16_mix_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vmpy($Vu32.qf16,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vmpy($Vu32.qf32,$Vv32.qf32)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.qf32 = vmpy($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_mix_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.qf32 = vmpy($Vu32.qf16,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_qf16 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.qf32 = vmpy($Vu32.qf16,$Vv32.qf16)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vmpy($Vu32.sf,$Vv32.sf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vmpy($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_sf_hf_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vxx32.sf += vmpy($Vu32.hf,$Vv32.hf)", +tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100010; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vmpy_sf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vmpy($Vu32.sf,$Vv32.sf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmpybus : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), @@ -34397,7 +35257,7 @@ def V6_vmpyhsrs : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -34422,7 +35282,7 @@ def V6_vmpyhss : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -34555,7 +35415,7 @@ def V6_vmpyhvsrs : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat", -tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { +tc_73efe966, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -35332,6 +36192,19 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpyuhvs : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.uh = vmpy($Vu32.uh,$Vv32.uh):>>16", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmux : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32), @@ -36007,7 +36880,7 @@ def V6_vrmpybusv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)", -tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { +tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -36061,7 +36934,7 @@ def V6_vrmpybv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vrmpy($Vu32.b,$Vv32.b)", -tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { +tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -36277,7 +37150,7 @@ def V6_vrmpyubv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)", -tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { +tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -37412,6 +38285,123 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_hf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vsub($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.qf16,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf16_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.qf16,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.qf32,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf32_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.qf32,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vsub($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_sf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vsub($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsubb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -38647,7 +39637,7 @@ def V6_zLd_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "z = vmem($Rt32+#$Ii)", -tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]> { +tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]>, PostInc_BaseImm { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101100000; @@ -38655,13 +39645,14 @@ let addrMode = BaseImmOffset; let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; +let CextOpcode = "V6_zLd"; let DecoderNamespace = "EXT_mmvec"; } def V6_zLd_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "z = vmem($Rx32++#$Ii)", -tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]> { +tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]>, PostInc_BaseImm { let Inst{7-0} = 0b00000000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101101000; @@ -38669,6 +39660,7 @@ let addrMode = PostInc; let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; +let CextOpcode = "V6_zLd"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -38782,6 +39774,17 @@ let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b0110110000100000; let isSolo = 1; } +def Y2_crswap_old : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in), +"crswap($Rx32,sgp)", +PSEUDO, TypeMAPPING> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let Constraints = "$Rx32 = $Rx32in"; +} def Y2_dccleana : HInst< (outs), (ins IntRegs:$Rs32), @@ -38861,6 +39864,22 @@ let Inst{13-0} = 0b00000000000010; let Inst{31-16} = 0b0101011111000000; let isSolo = 1; } +def Y2_k1lock_map : HInst< +(outs), +(ins), +"k1lock", +PSEUDO, TypeMAPPING>, Requires<[HasV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def Y2_k1unlock_map : HInst< +(outs), +(ins), +"k1unlock", +PSEUDO, TypeMAPPING>, Requires<[HasV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} def Y2_syncht : HInst< (outs), (ins), @@ -39083,7 +40102,7 @@ def dup_A2_add : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32)", -tc_388f9897, TypeALU32_3op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_3op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39093,7 +40112,7 @@ def dup_A2_addi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,#$Ii)", -tc_388f9897, TypeALU32_ADDI>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_ADDI>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39108,7 +40127,7 @@ def dup_A2_andir : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = and($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39123,7 +40142,7 @@ def dup_A2_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, s8_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39136,7 +40155,7 @@ def dup_A2_sxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxtb($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39146,7 +40165,7 @@ def dup_A2_sxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxth($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39156,7 +40175,7 @@ def dup_A2_tfr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = $Rs32", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39166,7 +40185,7 @@ def dup_A2_tfrsi : HInst< (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii), "$Rd32 = #$Ii", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39181,7 +40200,7 @@ def dup_A2_zxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxtb($Rs32)", -PSEUDO, TypeMAPPING>, Requires<[HasV68]> { +PSEUDO, TypeMAPPING>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39191,7 +40210,7 @@ def dup_A2_zxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxth($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39201,7 +40220,7 @@ def dup_A4_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39214,7 +40233,7 @@ def dup_A4_combineir : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rdd32 = combine(#$Ii,$Rs32)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39227,7 +40246,7 @@ def dup_A4_combineri : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rdd32 = combine($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39240,7 +40259,7 @@ def dup_C2_cmoveif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = #$Ii", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -39257,7 +40276,7 @@ def dup_C2_cmoveit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = #$Ii", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -39273,7 +40292,7 @@ def dup_C2_cmovenewif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = #$Ii", -tc_4ac61d92, TypeALU32_2op>, Requires<[HasV68]> { +tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -39291,7 +40310,7 @@ def dup_C2_cmovenewit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = #$Ii", -tc_4ac61d92, TypeALU32_2op>, Requires<[HasV68]> { +tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -39308,7 +40327,7 @@ def dup_C2_cmpeqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.eq($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39321,7 +40340,7 @@ def dup_L2_deallocframe : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = deallocframe($Rs32):raw", -tc_aee6250c, TypeLD>, Requires<[HasV68]> { +tc_aee6250c, TypeLD>, Requires<[HasV69]> { let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; let mayLoad = 1; @@ -39333,7 +40352,7 @@ def dup_L2_loadrb_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memb($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39351,7 +40370,7 @@ def dup_L2_loadrd_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s29_3Imm:$Ii), "$Rdd32 = memd($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -39367,7 +40386,7 @@ def dup_L2_loadrh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memh($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39385,7 +40404,7 @@ def dup_L2_loadri_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rd32 = memw($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39403,7 +40422,7 @@ def dup_L2_loadrub_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memub($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39421,7 +40440,7 @@ def dup_L2_loadruh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memuh($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39439,7 +40458,7 @@ def dup_S2_allocframe : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u11_3Imm:$Ii), "allocframe($Rx32,#$Ii):raw", -tc_74a42bda, TypeST>, Requires<[HasV68]> { +tc_74a42bda, TypeST>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39455,7 +40474,7 @@ def dup_S2_storerb_io : HInst< (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = ByteAccess; let AsmVariantName = "NonParsable"; @@ -39471,7 +40490,7 @@ def dup_S2_storerd_io : HInst< (outs), (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+#$Ii) = $Rtt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -39487,7 +40506,7 @@ def dup_S2_storerh_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let AsmVariantName = "NonParsable"; @@ -39503,7 +40522,7 @@ def dup_S2_storeri_io : HInst< (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = WordAccess; let AsmVariantName = "NonParsable"; @@ -39519,7 +40538,7 @@ def dup_S4_storeirb_io : HInst< (outs), (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "memb($Rs32+#$Ii) = #$II", -tc_838c4d7a, TypeV4LDST>, Requires<[HasV68]> { +tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = ByteAccess; let AsmVariantName = "NonParsable"; @@ -39535,7 +40554,7 @@ def dup_S4_storeiri_io : HInst< (outs), (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "memw($Rs32+#$Ii) = #$II", -tc_838c4d7a, TypeV4LDST>, Requires<[HasV68]> { +tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = WordAccess; let AsmVariantName = "NonParsable"; diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index e5c78d122c9e..64bc5091d1d1 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -1661,8 +1661,6 @@ def: Pat<(int_hexagon_Y2_dccleana IntRegs:$src1), (Y2_dccleana IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_Y2_dccleaninva IntRegs:$src1), (Y2_dccleaninva IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_Y2_dcfetch IntRegs:$src1), - (Y2_dcfetch IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_Y2_dcinva IntRegs:$src1), (Y2_dcinva IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_Y2_dczeroa IntRegs:$src1), @@ -3380,3 +3378,294 @@ def: Pat<(int_hexagon_V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, (V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV68, UseHVX64B]>; def: Pat<(int_hexagon_V6_v6mpyvubs10_vxx_128B HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4), (V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vabs_hf HvxVR:$src1), + (V6_vabs_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vabs_hf_128B HvxVR:$src1), + (V6_vabs_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vabs_sf HvxVR:$src1), + (V6_vabs_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vabs_sf_128B HvxVR:$src1), + (V6_vabs_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vadd_hf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_hf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf16_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_sf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_sf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vassign_fp HvxVR:$src1), + (V6_vassign_fp HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vassign_fp_128B HvxVR:$src1), + (V6_vassign_fp HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf16 HvxVR:$src1), + (V6_vconv_hf_qf16 HvxVR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf16_128B HvxVR:$src1), + (V6_vconv_hf_qf16 HvxVR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf32 HvxWR:$src1), + (V6_vconv_hf_qf32 HvxWR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf32_128B HvxWR:$src1), + (V6_vconv_hf_qf32 HvxWR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_sf_qf32 HvxVR:$src1), + (V6_vconv_sf_qf32 HvxVR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_sf_qf32_128B HvxVR:$src1), + (V6_vconv_sf_qf32 HvxVR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_b_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_h_hf HvxVR:$src1), + (V6_vcvt_h_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_h_hf_128B HvxVR:$src1), + (V6_vcvt_h_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_b HvxVR:$src1), + (V6_vcvt_hf_b HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_b_128B HvxVR:$src1), + (V6_vcvt_hf_b HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_h HvxVR:$src1), + (V6_vcvt_hf_h HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_h_128B HvxVR:$src1), + (V6_vcvt_hf_h HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_ub HvxVR:$src1), + (V6_vcvt_hf_ub HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_ub_128B HvxVR:$src1), + (V6_vcvt_hf_ub HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_uh HvxVR:$src1), + (V6_vcvt_hf_uh HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_uh_128B HvxVR:$src1), + (V6_vcvt_hf_uh HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_sf_hf HvxVR:$src1), + (V6_vcvt_sf_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_sf_hf_128B HvxVR:$src1), + (V6_vcvt_sf_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_ub_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_uh_hf HvxVR:$src1), + (V6_vcvt_uh_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_uh_hf_128B HvxVR:$src1), + (V6_vcvt_uh_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmax_hf HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmax_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmax_sf HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmax_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmin_hf HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmin_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmin_sf HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmin_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfneg_hf HvxVR:$src1), + (V6_vfneg_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfneg_hf_128B HvxVR:$src1), + (V6_vfneg_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfneg_sf HvxVR:$src1), + (V6_vfneg_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfneg_sf_128B HvxVR:$src1), + (V6_vfneg_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vgthf HvxVR:$src1, HvxVR:$src2), + (V6_vgthf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vgthf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf HvxVR:$src1, HvxVR:$src2), + (V6_vgtsf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vgtsf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_mix_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_mix_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_hf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_hf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf16_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_sf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; + +// V69 HVX Instructions. + +def: Pat<(int_hexagon_V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvuhubrndsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvuhubsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvwuhrndsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvwuhsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2), + (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpyuhvs_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/llvm/lib/Target/Hexagon/HexagonDepMappings.td index 919cb996ad15..2f7b76b893a9 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMappings.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMappings.td @@ -174,7 +174,6 @@ def V6_ldcpnt0Alias : InstAlias<"if ($Pv4) $Vd32.cur = vmem($Rt32):nt", (V6_vL32 def V6_ldnp0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; def V6_ldnpnt0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; def V6_ldnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; -def V6_ldntnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>; def V6_ldp0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; def V6_ldpnt0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; def V6_ldtnp0Alias : InstAlias<"if (!$Pv4) $Vd32.tmp = vmem($Rt32)", (V6_vL32b_npred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td index 45adaf50774f..898ef51bd48f 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -146,9 +146,6 @@ class InstHexagon pattern, bits<1> isFP = 0; let TSFlags {50} = isFP; // Floating-point. - bits<1> isSomeOK = 0; - let TSFlags {51} = isSomeOK; // Relax some grouping constraints. - bits<1> hasNewValue2 = 0; let TSFlags{52} = hasNewValue2; // Second New-value producer insn. bits<3> opNewValue2 = 0; @@ -160,8 +157,8 @@ class InstHexagon pattern, bits<1> prefersSlot3 = 0; let TSFlags{57} = prefersSlot3; // Complex XU - bits<1> hasTmpDst = 0; - let TSFlags{60} = hasTmpDst; // v65 : 'fake" register VTMP is set + bits<1> hasHvxTmp = 0; + let TSFlags{60} = hasHvxTmp; // vector register vX.tmp false-write bit CVINew = 0; let TSFlags{62} = CVINew; diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td index 11f8af7c41a0..afd63d6d4aa7 100644 --- a/llvm/lib/Target/Hexagon/HexagonPseudo.td +++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td @@ -572,3 +572,14 @@ defm PS_storerd : NewCircularStore; // __builtin_trap. let hasSideEffects = 1, isPseudo = 1, isCodeGenOnly = 1, isSolo = 1 in def PS_crash: InstHexagon<(outs), (ins), "", [], "", PSEUDO, TypePSEUDO>; + +// This is actual trap1 instruction from before v65. It's here since it is +// no longer included in DepInstrInfo.td. +def PS_trap1 : HInst<(outs), (ins u8_0Imm:$Ii), "trap1(#$Ii)", tc_53c851ab, + TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> { + let Inst{1-0} = 0b00; + let Inst{7-5} = 0b000; + let Inst{13-13} = 0b0; + let Inst{31-16} = 0b0101010010000000; +} + diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td index 88d775f16a7f..931578c9e78d 100644 --- a/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -69,3 +69,4 @@ include "HexagonScheduleV66.td" include "HexagonScheduleV67.td" include "HexagonScheduleV67T.td" include "HexagonScheduleV68.td" +include "HexagonScheduleV69.td" diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV69.td b/llvm/lib/Target/Hexagon/HexagonScheduleV69.td new file mode 100644 index 000000000000..ddd246866e20 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV69.td @@ -0,0 +1,40 @@ +//=-HexagonScheduleV69.td - HexagonV69 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// ScalarItin and HVXItin contain some old itineraries +// still used by a handful of instructions. Hopefully, we will be able +// to get rid of them soon. +def HexagonV69ItinList : DepScalarItinV69, ScalarItin, + DepHVXItinV69, HVXItin, PseudoItin { + list ItinList = + !listconcat(DepScalarItinV69_list, ScalarItin_list, + DepHVXItinV69_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV69 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV69ItinList.ItinList>; + +def HexagonModelV69 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV69; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V69 Resource Definitions - +//===----------------------------------------------------------------------===// + diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index ecb2f88d8096..08bb4580b585 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -75,6 +75,10 @@ static cl::opt EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable checking for cache bank conflicts")); +static cl::opt EnableV68FloatCodeGen( + "force-hvx-float", cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable the code-generation for vector float instructions on v68.")); + HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), @@ -103,13 +107,71 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { UseAudioOps = false; UseLongCalls = false; - UseBSBScheduling = hasV60Ops() && EnableBSBSched; + SubtargetFeatures Features(FS); - ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS); + // Turn on QFloat if the HVX version is v68+. + // The function ParseSubtargetFeatures will set feature bits and initialize + // subtarget's variables all in one, so there isn't a good way to preprocess + // the feature string, other than by tinkering with it directly. + auto IsQFloatFS = [](StringRef F) { + return F == "+hvx-qfloat" || F == "-hvx-qfloat"; + }; + if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) { + auto getHvxVersion = [&Features](StringRef FS) -> StringRef { + for (StringRef F : llvm::reverse(Features.getFeatures())) { + if (F.startswith("+hvxv")) + return F; + } + for (StringRef F : llvm::reverse(Features.getFeatures())) { + if (F == "-hvx") + return StringRef(); + if (F.startswith("+hvx") || F == "-hvx") + return F.take_front(4); // Return "+hvx" or "-hvx". + } + return StringRef(); + }; + + bool AddQFloat = false; + StringRef HvxVer = getHvxVersion(FS); + if (HvxVer.startswith("+hvxv")) { + int Ver = 0; + if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68) + AddQFloat = true; + } else if (HvxVer == "+hvx") { + if (hasV68Ops()) + AddQFloat = true; + } + + if (AddQFloat) + Features.AddFeature("+hvx-qfloat"); + } + + std::string FeatureString = Features.getString(); + ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString); + + // Enable float code generation only if the flag(s) are set and + // the feature is enabled. v68 is guarded by additional flags. + bool GreaterThanV68 = false; + if (useHVXV69Ops()) + GreaterThanV68 = true; + + // Support for deprecated qfloat/ieee codegen flags + if (!GreaterThanV68) { + if (EnableV68FloatCodeGen) + UseHVXFloatingPoint = true; + } else { + UseHVXFloatingPoint = true; + } + + if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint) + LLVM_DEBUG( + dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen..."); if (OverrideLongCalls.getPosition()) UseLongCalls = OverrideLongCalls; + UseBSBScheduling = hasV60Ops() && EnableBSBSched; + if (isTinyCore()) { // Tiny core has a single thread, so back-to-back scheduling is enabled by // default. @@ -117,10 +179,10 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { UseBSBScheduling = false; } - FeatureBitset Features = getFeatureBits(); + FeatureBitset FeatureBits = getFeatureBits(); if (HexagonDisableDuplex) - setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); - setFeatureBits(Hexagon_MC::completeHVXFeatures(Features)); + setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex)); + setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits)); return *this; } diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 8ca6018bd81c..e4f375440be1 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -56,6 +56,10 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool UseSmallData = false; bool UseUnsafeMath = false; bool UseZRegOps = false; + bool UseHVXIEEEFPOps = false; + bool UseHVXQFloatOps = false; + bool UseHVXFloatingPoint = false; + bool UseCabac = false; bool HasPreV65 = false; bool HasMemNoShuf = false; @@ -188,6 +192,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool hasV68OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V68; } + bool hasV69Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V69; + } + bool hasV69OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V69; + } bool useAudioOps() const { return UseAudioOps; } bool useCompound() const { return UseCompound; } @@ -199,10 +209,16 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useSmallData() const { return UseSmallData; } bool useUnsafeMath() const { return UseUnsafeMath; } bool useZRegOps() const { return UseZRegOps; } + bool useCabac() const { return UseCabac; } bool isTinyCore() const { return HexagonProcFamily == TinyCore; } bool isTinyCoreWithDuplex() const { return isTinyCore() && EnableDuplex; } + bool useHVXIEEEFPOps() const { return UseHVXIEEEFPOps && useHVXOps(); } + bool useHVXQFloatOps() const { + return UseHVXQFloatOps && HexagonHVXVersion >= Hexagon::ArchEnum::V68; + } + bool useHVXFloatingPoint() const { return UseHVXFloatingPoint; } bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::NoArch; } @@ -224,6 +240,9 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useHVXV68Ops() const { return HexagonHVXVersion >= Hexagon::ArchEnum::V68; } + bool useHVXV69Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V69; + } bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; } bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; } @@ -283,7 +302,11 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { } ArrayRef getHVXElementTypes() const { - static MVT Types[] = { MVT::i8, MVT::i16, MVT::i32 }; + static MVT Types[] = {MVT::i8, MVT::i16, MVT::i32}; + static MVT TypesV68[] = {MVT::i8, MVT::i16, MVT::i32, MVT::f16, MVT::f32}; + + if (useHVXV68Ops() && useHVXFloatingPoint()) + return makeArrayRef(TypesV68); return makeArrayRef(Types); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 4125566bc58a..c9a1781a4543 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -154,9 +154,8 @@ namespace HexagonII { PrefersSlot3Pos = 57, PrefersSlot3Mask = 0x1, - // v65 - HasTmpDstPos = 60, - HasTmpDstMask = 0x1, + HasHvxTmpPos = 60, + HasHvxTmpMask = 0x1, CVINewPos = 62, CVINewMask = 0x1, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index fee1acdbbe8a..96c2965296ca 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -98,6 +98,10 @@ void HexagonMCChecker::init(MCInst const &MCI) { for (unsigned i = 0; i < MCID.getNumImplicitUses(); ++i) initReg(MCI, MCID.getImplicitUses()[i], PredReg, isTrue); + const bool IgnoreTmpDst = (HexagonMCInstrInfo::hasTmpDst(MCII, MCI) || + HexagonMCInstrInfo::hasHvxTmp(MCII, MCI)) && + STI.getFeatureBits()[Hexagon::ArchV69]; + // Get implicit register definitions. if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) for (; *ImpDef; ++ImpDef) { @@ -123,7 +127,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) // Include implicit late predicates. LatePreds.insert(R); - else + else if (!IgnoreTmpDst) Defs[R].insert(PredSense(PredReg, isTrue)); } @@ -178,7 +182,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) Uses.insert(*SRI); - else + else if (!IgnoreTmpDst) Defs[*SRI].insert(PredSense(PredReg, isTrue)); } } @@ -227,9 +231,11 @@ bool HexagonMCChecker::check(bool FullCheck) { bool chkAXOK = checkAXOK(); bool chkCofMax1 = checkCOFMax1(); bool chkHWLoop = checkHWLoop(); + bool chkValidTmpDst = FullCheck ? checkValidTmpDst() : true; bool chkLegalVecRegPair = checkLegalVecRegPair(); bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl && - chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair; + chkAXOK && chkCofMax1 && chkHWLoop && chkValidTmpDst && + chkLegalVecRegPair; return chk; } @@ -676,6 +682,32 @@ bool HexagonMCChecker::checkShuffle() { return MCSDX.check(); } +bool HexagonMCChecker::checkValidTmpDst() { + if (!STI.getFeatureBits()[Hexagon::ArchV69]) { + return true; + } + auto HasTmp = [&](MCInst const &I) { + return HexagonMCInstrInfo::hasTmpDst(MCII, I) || + HexagonMCInstrInfo::hasHvxTmp(MCII, I); + }; + unsigned HasTmpCount = + llvm::count_if(HexagonMCInstrInfo::bundleInstructions(MCII, MCB), HasTmp); + + if (HasTmpCount > 1) { + reportError( + MCB.getLoc(), + "this packet has more than one HVX vtmp/.tmp destination instruction"); + + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) + if (HasTmp(I)) + reportNote(I.getLoc(), + "this is an HVX vtmp/.tmp destination instruction"); + + return false; + } + return true; +} + void HexagonMCChecker::compoundRegisterMap(unsigned &Register) { switch (Register) { default: diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index 00afdb664ba5..dbd3d8ae45e6 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -99,6 +99,7 @@ class HexagonMCChecker { bool checkHWLoop(); bool checkCOFMax1(); bool checkLegalVecRegPair(); + bool checkValidTmpDst(); static void compoundRegisterMap(unsigned &); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index fa12fe1da448..68ccb20f4f15 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -939,10 +939,24 @@ bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, return (F >> HexagonII::PrefersSlot3Pos) & HexagonII::PrefersSlot3Mask; } -/// return true if instruction has hasTmpDst attribute. bool HexagonMCInstrInfo::hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI) { + switch (MCI.getOpcode()) { + default: + return false; + case Hexagon::V6_vgathermh: + case Hexagon::V6_vgathermhq: + case Hexagon::V6_vgathermhw: + case Hexagon::V6_vgathermhwq: + case Hexagon::V6_vgathermw: + case Hexagon::V6_vgathermwq: + return true; + } + return false; +} + +bool HexagonMCInstrInfo::hasHvxTmp(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - return (F >> HexagonII::HasTmpDstPos) & HexagonII::HasTmpDstMask; + return (F >> HexagonII::HasHvxTmpPos) & HexagonII::HasHvxTmpMask; } bool HexagonMCInstrInfo::requiresSlot(MCSubtargetInfo const &STI, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 7b3c079880f8..5c56db14798f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -41,7 +41,8 @@ class DuplexCandidate { namespace Hexagon { -class PacketIterator { +class PacketIterator : public std::iterator { MCInstrInfo const &MCII; MCInst::const_iterator BundleCurrent; MCInst::const_iterator BundleEnd; @@ -188,6 +189,7 @@ bool hasImmExt(MCInst const &MCI); bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI); bool hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI); +bool hasHvxTmp(MCInstrInfo const &MCII, MCInst const &MCI); unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); int64_t minConstant(MCInst const &MCI, size_t Index); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index d832a756cb92..dfdddb50657c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -80,6 +80,8 @@ cl::opt MV67T("mv67t", cl::Hidden, cl::desc("Build for Hexagon V67T"), cl::init(false)); cl::opt MV68("mv68", cl::Hidden, cl::desc("Build for Hexagon V68"), cl::init(false)); +cl::opt MV69("mv69", cl::Hidden, cl::desc("Build for Hexagon V69"), + cl::init(false)); cl::opt EnableHVX("mhvx", @@ -91,6 +93,7 @@ cl::opt clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"), clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"), clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"), + clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"), // Sentinel for no value specified. clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. @@ -101,6 +104,11 @@ static cl::opt DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions")); +static cl::opt + EnableHvxIeeeFp("mhvx-ieee-fp", cl::Hidden, + cl::desc("Enable HVX IEEE floating point extensions")); +static cl::opt EnableHexagonCabac + ("mcabac", cl::desc("tbd"), cl::init(false)); static StringRef DefaultArch = "hexagonv60"; @@ -123,6 +131,8 @@ static StringRef HexagonGetArchVariant() { return "hexagonv67t"; if (MV68) return "hexagonv68"; + if (MV69) + return "hexagonv69"; return ""; } @@ -371,6 +381,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { case Hexagon::ArchEnum::V68: Result.push_back("+hvxv68"); break; + case Hexagon::ArchEnum::V69: + Result.push_back("+hvxv69"); + break; case Hexagon::ArchEnum::Generic:{ Result.push_back(StringSwitch(CPU) .Case("hexagonv60", "+hvxv60") @@ -379,13 +392,19 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { .Case("hexagonv66", "+hvxv66") .Case("hexagonv67", "+hvxv67") .Case("hexagonv67t", "+hvxv67") - .Case("hexagonv68", "+hvxv68")); + .Case("hexagonv68", "+hvxv68") + .Case("hexagonv69", "+hvxv69")); break; } case Hexagon::ArchEnum::NoArch: // Sentinel if -mhvx isn't specified break; } + if (EnableHvxIeeeFp) + Result.push_back("+hvx-ieee-fp"); + if (EnableHexagonCabac) + Result.push_back("+cabac"); + return join(Result.begin(), Result.end(), ","); } } @@ -422,8 +441,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // turns on hvxvNN, corresponding to the existing ArchVNN. FeatureBitset FB = S; unsigned CpuArch = ArchV5; - for (unsigned F : {ArchV68, ArchV67, ArchV66, ArchV65, ArchV62, ArchV60, - ArchV55, ArchV5}) { + for (unsigned F : {ArchV69, ArchV68, ArchV67, ArchV66, ArchV65, ArchV62, + ArchV60, ArchV55, ArchV5}) { if (!FB.test(F)) continue; CpuArch = F; @@ -438,7 +457,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { } bool HasHvxVer = false; for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, - ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68}) { + ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68, + ExtensionHVXV69}) { if (!FB.test(F)) continue; HasHvxVer = true; @@ -451,6 +471,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // HasHvxVer is false, and UseHvx is true. switch (CpuArch) { + case ArchV69: + FB.set(ExtensionHVXV69); + LLVM_FALLTHROUGH; case ArchV68: FB.set(ExtensionHVXV68); LLVM_FALLTHROUGH; @@ -538,6 +561,7 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { {"hexagonv67", ELF::EF_HEXAGON_MACH_V67}, {"hexagonv67t", ELF::EF_HEXAGON_MACH_V67T}, {"hexagonv68", ELF::EF_HEXAGON_MACH_V68}, + {"hexagonv69", ELF::EF_HEXAGON_MACH_V69}, }; auto F = ElfFlags.find(STI.getCPU()); diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp index 203e05dde7ad..419f0ac1a8a7 100644 --- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp +++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp @@ -479,14 +479,12 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, // remove the use-soft-float attribute static void removeUseSoftFloat(Function &F) { - AttrBuilder B; LLVM_DEBUG(errs() << "removing -use-soft-float\n"); - B.addAttribute("use-soft-float", "false"); - F.removeFnAttrs(B); + F.removeFnAttr("use-soft-float"); if (F.hasFnAttribute("use-soft-float")) { LLVM_DEBUG(errs() << "still has -use-soft-float\n"); } - F.addFnAttrs(B); + F.addFnAttr("use-soft-float", "false"); } // This pass only makes sense when the underlying chip has floating point but diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6ee6a9d6525..8d6edf07bc53 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (LHS.getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. - if (CC == ISD::SETEQ || CC == ISD::SETNE) { - return DAG.getNode( - ISD::BITCAST, dl, MVT::v2i64, - DAG.getSetCC(dl, MVT::v4i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC)); - } - - return SDValue(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + SDValue SetCC32 = DAG.getSetCC( + dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC); + int ShuffV[] = {1, 0, 3, 2}; + SDValue Shuff = + DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV); + return DAG.getBitcast( + MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32)); } // We handle most of these in the usual way. diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 7f63827afbd6..0c7be96a0595 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -413,9 +413,9 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) - for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) - MadeChange |= runOnLoop(*L); + for (Loop *I : *LI) + for (Loop *L : depth_first(I)) + MadeChange |= runOnLoop(L); return MadeChange; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index c38dec08af90..ed28731b8ef2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) { InstructionCost PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind) { - // Set the max cost if an MMA type is present (v256i1, v512i1). - if (isMMAType(U->getType())) - return InstructionCost::getMax(); - // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa(U) || isa(U) || isa(U)) @@ -1276,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } -bool PPCTTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const { +bool PPCTTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef &Types) const { // We need to ensure that argument promotion does not // attempt to promote pointers to MMA types (__vector_pair // and __vector_quad) since these types explicitly cannot be // passed as arguments. Both of these types are larger than // the 128-bit Altivec vectors and have a scalar size of 1 bit. - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast(A->getType())->getElementType(); - if (EltTy->isSized()) - return (EltTy->isIntOrIntVectorTy(1) && - EltTy->getPrimitiveSizeInBits() > 128); + return llvm::none_of(Types, [](Type *Ty) { + if (Ty->isSized()) + return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128; return false; }); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index ad926705a330..0af6f2a308d9 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -134,9 +134,8 @@ class PPCTTIImpl : public BasicTTIImplBase { bool UseMaskForCond = false, bool UseMaskForGaps = false); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const; bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const; InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f33965b50459..4f5512e6fb37 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -335,6 +335,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f16, Expand); @@ -405,6 +407,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f64, Expand); @@ -430,6 +434,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } @@ -538,6 +547,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); @@ -822,6 +835,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::XOR, VT, Custom); @@ -1166,7 +1182,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasStdExtZbb(); } -bool RISCVTargetLowering::hasAndNot(SDValue Y) const { +bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { EVT VT = Y.getValueType(); // FIXME: Support vectors once we have tests. @@ -3180,11 +3196,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_UREM: return lowerVPOp(Op, DAG, RISCVISD::UREM_VL); case ISD::VP_AND: - return lowerVPOp(Op, DAG, RISCVISD::AND_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); case ISD::VP_OR: - return lowerVPOp(Op, DAG, RISCVISD::OR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); case ISD::VP_XOR: - return lowerVPOp(Op, DAG, RISCVISD::XOR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); case ISD::VP_ASHR: return lowerVPOp(Op, DAG, RISCVISD::SRA_VL); case ISD::VP_LSHR: @@ -4529,9 +4545,12 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { switch (Opcode) { default: llvm_unreachable("Unhandled reduction"); - case ISD::VECREDUCE_FADD: - return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + case ISD::VECREDUCE_FADD: { + // Use positive zero if we can. It is cheaper to materialize. + SDValue Zero = + DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); + return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); + } case ISD::VECREDUCE_SEQ_FADD: return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), Op.getOperand(0)); @@ -5397,6 +5416,33 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, VPOp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, + unsigned MaskOpc, + unsigned VecOpc) const { + MVT VT = Op.getSimpleValueType(); + if (VT.getVectorElementType() != MVT::i1) + return lowerVPOp(Op, DAG, VecOpc); + + // It is safe to drop mask parameter as masked-off elements are undef. + SDValue Op1 = Op->getOperand(0); + SDValue Op2 = Op->getOperand(1); + SDValue VL = Op->getOperand(3); + + MVT ContainerVT = VT; + const bool IsFixed = VT.isFixedLengthVector(); + if (IsFixed) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + } + + SDLoc DL(Op); + SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); + if (!IsFixed) + return Val; + return convertFromScalableVector(VT, Val, DAG, Subtarget); +} + // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be // matched to a RVV indexed load. The RVV indexed load instructions only // support the "unsigned unscaled" addressing mode; indices are implicitly @@ -5727,11 +5773,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != TargetLowering::TypeSoftenFloat) { - // FIXME: Support strict FP. - if (IsStrict) - return; if (!isTypeLegal(Op0.getValueType())) return; + if (IsStrict) { + unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RTZ_RV64 + : RISCVISD::STRICT_FCVT_WU_RTZ_RV64; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(Opc, DL, VTs, N->getOperand(0), Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + Results.push_back(Res.getValue(1)); + return; + } unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0); @@ -7058,7 +7110,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth))) return SDValue(N, 0); - return combineGREVI_GORCI(N, DCI.DAG); + return combineGREVI_GORCI(N, DAG); } case RISCVISD::GREVW: case RISCVISD::GORCW: { @@ -7067,7 +7119,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SimplifyDemandedLowBitsHelper(1, 5)) return SDValue(N, 0); - return combineGREVI_GORCI(N, DCI.DAG); + return combineGREVI_GORCI(N, DAG); } case RISCVISD::SHFL: case RISCVISD::UNSHFL: { @@ -7152,11 +7204,23 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during // type legalization. This is safe because fp_to_uint produces poison if // it overflows. - if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() && - N->getOperand(0).getOpcode() == ISD::FP_TO_UINT && - isTypeLegal(N->getOperand(0).getOperand(0).getValueType())) - return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, - N->getOperand(0).getOperand(0)); + if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { + SDValue Src = N->getOperand(0); + if (Src.getOpcode() == ISD::FP_TO_UINT && + isTypeLegal(Src.getOperand(0).getValueType())) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, + Src.getOperand(0)); + if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && + isTypeLegal(Src.getOperand(1).getValueType())) { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, + Src.getOperand(0), Src.getOperand(1)); + DCI.CombineTo(N, Res); + DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(Src.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked. + } + } return SDValue(); case RISCVISD::SELECT_CC: { // Transform @@ -7717,6 +7781,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::BDECOMPRESSW: case RISCVISD::FCVT_W_RTZ_RV64: case RISCVISD::FCVT_WU_RTZ_RV64: + case RISCVISD::STRICT_FCVT_W_RTZ_RV64: + case RISCVISD::STRICT_FCVT_WU_RTZ_RV64: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -9448,6 +9514,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FCVT_XU_RTZ) NODE_NAME_CASE(FCVT_W_RTZ_RV64) NODE_NAME_CASE(FCVT_WU_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_W_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_WU_RTZ_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(GREV) NODE_NAME_CASE(GREVW) @@ -9589,6 +9657,9 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': + // TODO: Support fixed vectors up to XLen for P extension? + if (VT.isVector()) + break; return std::make_pair(0U, &RISCV::GPRRegClass); case 'f': if (Subtarget.hasStdExtZfh() && VT == MVT::f16) @@ -9601,17 +9672,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, default: break; } - } else { - if (Constraint == "vr") { - for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, - &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { - if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) - return std::make_pair(0U, RC); - } - } else if (Constraint == "vm") { - if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) - return std::make_pair(0U, &RISCV::VMV0RegClass); + } else if (Constraint == "vr") { + for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, + &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { + if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) + return std::make_pair(0U, RC); } + } else if (Constraint == "vm") { + if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) + return std::make_pair(0U, &RISCV::VMV0RegClass); } // Clang will correctly decode the usage of register name aliases into their diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 4ccfcbc50f7c..48c5ce730933 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -282,6 +282,11 @@ enum NodeType : unsigned { // the value read before the modification and the new chain pointer. SWAP_CSR, + // FP to 32 bit int conversions for RV64. These are used to keep track of the + // result being sign extended to 64 bit. These saturate out of range inputs. + STRICT_FCVT_W_RTZ_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCVT_WU_RTZ_RV64, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, @@ -315,7 +320,7 @@ class RISCVTargetLowering : public TargetLowering { bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; - bool hasAndNot(SDValue Y) const override; + bool hasAndNotCompare(SDValue Y) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, @@ -596,6 +601,8 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, bool HasMask = true) const; SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const; + SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, + unsigned VecOpc) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index f328972e75c5..d6c31c4804db 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -184,8 +184,8 @@ let Predicates = [HasStdExtD] in { /// Float conversion operations // f64 -> f32, f32 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so // are defined later. @@ -271,8 +271,8 @@ let Predicates = [HasStdExtD, IsRV32] in { def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>; // double->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; // Saturating double->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>; @@ -285,8 +285,8 @@ def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>; def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>; // [u]int->double. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; } // Predicates = [HasStdExtD, IsRV32] let Predicates = [HasStdExtD, IsRV64] in { @@ -301,20 +301,20 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>; // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; // [u]int32->fp -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; // Saturating double->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>; def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>; // double->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; // double->int64 with current rounding mode. def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; @@ -325,6 +325,6 @@ def : Pat<(i64 (lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; // [u]int64->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; } // Predicates = [HasStdExtD, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index b8f71d753614..bb45ed859442 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -19,9 +19,9 @@ def SDT_RISCVFMV_W_X_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; def SDT_RISCVFMV_X_ANYEXTW_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; -def STD_RISCVFCVT_W_RV64 +def SDT_RISCVFCVT_W_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>; -def STD_RISCVFCVT_X +def SDT_RISCVFCVT_X : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>; def riscv_fmv_w_x_rv64 @@ -29,13 +29,27 @@ def riscv_fmv_w_x_rv64 def riscv_fmv_x_anyextw_rv64 : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>; def riscv_fcvt_w_rtz_rv64 - : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_wu_rtz_rv64 - : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_x_rtz - : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_X_RTZ", SDT_RISCVFCVT_X>; def riscv_fcvt_xu_rtz - : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_XU_RTZ", SDT_RISCVFCVT_X>; + +def riscv_strict_fcvt_w_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; +def riscv_strict_fcvt_wu_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; + +def riscv_any_fcvt_w_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_w_rtz_rv64 node:$src), + (riscv_fcvt_w_rtz_rv64 node:$src)]>; +def riscv_any_fcvt_wu_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_wu_rtz_rv64 node:$src), + (riscv_fcvt_wu_rtz_rv64 node:$src)]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -375,8 +389,8 @@ def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>; def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>; // float->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; @@ -389,8 +403,8 @@ def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>; def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] let Predicates = [HasStdExtF, IsRV64] in { @@ -403,12 +417,12 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32), // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; // float->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; @@ -423,8 +437,8 @@ def : Pat<(i64 (lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 76e0ea6f093b..663e44813899 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -277,8 +277,8 @@ defm : StPat; /// Float conversion operations // f32 -> f16, f16 -> f32 -def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; @@ -287,8 +287,8 @@ def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; let Predicates = [HasStdExtZfh, IsRV32] in { // half->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; @@ -301,20 +301,20 @@ def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>; def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV32] let Predicates = [HasStdExtZfh, IsRV64] in { // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; // half->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; @@ -329,17 +329,17 @@ def : Pat<(i64 (lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV64] let Predicates = [HasStdExtZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; /// Float arithmetic operations def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 016db9d4c26d..7353496f4684 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -185,7 +185,9 @@ class RISCVTTIImpl : public BasicTTIImplBase { } unsigned getMaxInterleaveFactor(unsigned VF) { - return ST->getMaxInterleaveFactor(); + // If the loop will not be vectorized, don't interleave the loop. + // Let regular unroll to unroll the loop. + return VF == 1 ? 1 : ST->getMaxInterleaveFactor(); } }; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6bae55695f3d..2ba0b97229cc 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1759,7 +1759,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm( // registers in a mmory expression, and though unaccessible via rip/eip. if (IsGlobalLV && (BaseReg || IndexReg)) { Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, - End, Size, Identifier, Decl)); + End, Size, Identifier, Decl, + FrontendSize)); return false; } // Otherwise, we set the base register to a non-zero value diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 6d20c6af5fd2..67b1244708a8 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -286,10 +286,9 @@ struct X86Operand final : public MCParsedAsmOperand { bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; } bool isMemPlaceholder(const MCInstrDesc &Desc) const override { - // Add more restrictions to avoid the use of global symbols. This helps - // with reducing the code size. - return !Desc.isRematerializable() && !Desc.isCall() && isMem() && - !Mem.BaseReg && !Mem.IndexReg; + // Only MS InlineAsm uses global variables with registers rather than + // rip/eip. + return isMem() && !Mem.DefaultBaseReg && Mem.FrontendSize; } bool needAddressOf() const override { return AddressOf; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3c83beeb0ce5..6f6361b6757b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1091,12 +1091,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + if (VT == MVT::v2i64) continue; + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } - setOperationAction(ISD::ROTL, MVT::v4i32, Custom); - setOperationAction(ISD::ROTL, MVT::v8i16, Custom); - setOperationAction(ISD::ROTL, MVT::v16i8, Custom); - setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); @@ -1194,8 +1193,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) + MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); + } // XOP can efficiently perform BITREVERSE with VPPERM. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) @@ -1278,6 +1279,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + if (VT == MVT::v4i64) continue; + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } // These types need custom splitting if their input is a 128-bit vector. @@ -1286,10 +1290,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ROTL, MVT::v8i32, Custom); - setOperationAction(ISD::ROTL, MVT::v16i16, Custom); - setOperationAction(ISD::ROTL, MVT::v32i8, Custom); - setOperationAction(ISD::SELECT, MVT::v4f64, Custom); setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8i32, Custom); @@ -1654,6 +1654,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use @@ -1668,18 +1670,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::ROTL, VT, Custom); - setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } - // With BWI, expanding (and promoting the shifts) is the better. - if (!Subtarget.useBWIRegs()) - setOperationAction(ISD::ROTL, MVT::v32i16, Custom); - - setOperationAction(ISD::ROTL, MVT::v64i8, Custom); - for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); @@ -29847,7 +29841,19 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); } - assert(IsROTL && "Only ROTL supported"); + SDValue Z = DAG.getConstant(0, DL, VT); + + if (!IsROTL) { + // If the ISD::ROTR amount is constant, we're always better converting to + // ISD::ROTL. + if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt})) + return DAG.getNode(ISD::ROTL, DL, VT, R, NegAmt); + + // XOP targets always prefers ISD::ROTL. + if (Subtarget.hasXOP()) + return DAG.getNode(ISD::ROTL, DL, VT, R, + DAG.getNode(ISD::SUB, DL, VT, Z, Amt)); + } // Split 256-bit integers on XOP/pre-AVX2 targets. if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2())) @@ -29857,6 +29863,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL. // XOP implicitly uses modulo rotation amounts. if (Subtarget.hasXOP()) { + assert(IsROTL && "Only ROTL expected"); assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); // Attempt to rotate by immediate. @@ -29878,27 +29885,18 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (VT.is512BitVector() && !Subtarget.useBWIRegs()) return splitVectorIntBinary(Op, DAG); - assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || - ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && - Subtarget.hasAVX2()) || - (VT == MVT::v32i16 && !Subtarget.useBWIRegs()) || - (VT == MVT::v64i8 && Subtarget.useBWIRegs())) && - "Only vXi32/vXi16/vXi8 vector rotates supported"); - - // Check for a hidden ISD::ROTR, splat + vXi8 lowering can handle both, but we - // currently hit infinite loops in legalization if we allow ISD::ROTR. - // FIXME: Infinite ROTL<->ROTR legalization in TargetLowering::expandROT. - SDValue HiddenROTRAmt; - if (Amt.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(Amt.getOperand(0).getNode())) - HiddenROTRAmt = Amt.getOperand(1); + assert( + (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || + ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && + Subtarget.hasAVX2()) || + ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) && + "Only vXi32/vXi16/vXi8 vector rotates supported"); MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits); MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2); SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT); - SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, - HiddenROTRAmt ? HiddenROTRAmt : Amt, AmtMask); + SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); // Attempt to fold as unpack(x,x) << zext(splat(y)): // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw. @@ -29906,7 +29904,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // TODO: Handle vXi16 cases. if (EltSizeInBits == 8 || EltSizeInBits == 32) { if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) { - unsigned ShiftX86Opc = HiddenROTRAmt ? X86ISD::VSRLI : X86ISD::VSHLI; + unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI; SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); BaseRotAmt = DAG.getZExtOrTrunc(BaseRotAmt, DL, MVT::i32); @@ -29914,7 +29912,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, Subtarget, DAG); Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt, Subtarget, DAG); - return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt); + return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); } } @@ -29925,7 +29923,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, bool IsConstAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); MVT WideVT = MVT::getVectorVT(Subtarget.hasBWI() ? MVT::i16 : MVT::i32, NumElts); - unsigned ShiftOpc = HiddenROTRAmt ? ISD::SRL : ISD::SHL; + unsigned ShiftOpc = IsROTL ? ISD::SHL : ISD::SRL; // Attempt to fold as: // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw. @@ -29942,7 +29940,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, getTargetVShiftByConstNode(X86ISD::VSHLI, DL, WideVT, R, 8, DAG)); Amt = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, AmtMod); R = DAG.getNode(ShiftOpc, DL, WideVT, R, Amt); - if (!HiddenROTRAmt) + if (IsROTL) R = getTargetVShiftByConstNode(X86ISD::VSRLI, DL, WideVT, R, 8, DAG); return DAG.getNode(ISD::TRUNCATE, DL, VT, R); } @@ -29952,14 +29950,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))). if (IsConstAmt || supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) { // See if we can perform this by unpacking to lo/hi vXi16. - SDValue Z = DAG.getConstant(0, DL, VT); SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z)); SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z)); SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo); SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi); - return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt); + return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); } assert((VT == MVT::v16i8 || VT == MVT::v32i8) && "Unsupported vXi8 type"); @@ -29982,15 +29979,15 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return DAG.getSelect(DL, SelVT, C, V0, V1); }; - // 'Hidden' ROTR is currently only profitable on AVX512 targets where we - // have VPTERNLOG. - unsigned ShiftLHS = ISD::SHL; - unsigned ShiftRHS = ISD::SRL; - if (HiddenROTRAmt && useVPTERNLOG(Subtarget, VT)) { - std::swap(ShiftLHS, ShiftRHS); - Amt = HiddenROTRAmt; + // ISD::ROTR is currently only profitable on AVX512 targets with VPTERNLOG. + if (!IsROTL && !useVPTERNLOG(Subtarget, VT)) { + Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt); + IsROTL = true; } + unsigned ShiftLHS = IsROTL ? ISD::SHL : ISD::SRL; + unsigned ShiftRHS = IsROTL ? ISD::SRL : ISD::SHL; + // Turn 'a' into a mask suitable for VSELECT: a = a << 5; // We can safely do this using i16 shifts as we're only interested in // the 3 lower bits of each byte. @@ -30027,9 +30024,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return SignBitSelect(VT, Amt, M, R); } - // ISD::ROT* uses modulo rotate amounts. - Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); - bool IsSplatAmt = DAG.isSplatValue(Amt); bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) && @@ -30038,13 +30032,25 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // Fallback for splats + all supported variable shifts. // Fallback for non-constants AVX2 vXi16 as well. if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) { + Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); - SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt); - SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR); + SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt); + SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR); return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); } + // Everything below assumes ISD::ROTL. + if (!IsROTL) { + Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt); + IsROTL = true; + } + + // ISD::ROT* uses modulo rotate amounts. + Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); + + assert(IsROTL && "Only ROTL supported"); + // As with shifts, attempt to convert the rotation amount to a multiplication // factor, fallback to general expansion. SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); @@ -32994,11 +33000,6 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { unsigned Bits = Ty->getScalarSizeInBits(); - // 8-bit shifts are always expensive, but versions with a scalar amount aren't - // particularly cheaper than those without. - if (Bits == 8) - return false; - // XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts. // Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred. if (Subtarget.hasXOP() && @@ -41050,6 +41051,28 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( Op, DemandedBits, DemandedElts, DAG, Depth); } +bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op, + const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const { + unsigned NumElts = DemandedElts.getBitWidth(); + unsigned Opc = Op.getOpcode(); + + switch (Opc) { + case X86ISD::VBROADCAST: + case X86ISD::VBROADCAST_LOAD: + // TODO: Permit vXi64 types on 32-bit targets. + if (isTypeLegal(Op.getValueType().getVectorElementType())) { + UndefElts = APInt::getNullValue(NumElts); + return true; + } + return false; + } + + return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts, + Depth); +} + // Helper to peek through bitops/trunc/setcc to determine size of source vector. // Allows combineBitcastvxi1 to determine what size vector generated a . static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2ea3477c3820..d1d6e319f16b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1159,6 +1159,10 @@ namespace llvm { SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const override; + bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 41f273a1d567..d8cd7311a0d5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5192,10 +5192,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, return (RealCallerBits & RealCalleeBits) == RealCalleeBits; } -bool X86TTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const { - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) +bool X86TTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef &Types) const { + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; // If we get here, we know the target features match. If one function @@ -5210,13 +5210,8 @@ bool X86TTIImpl::areFunctionArgsABICompatible( // Consider the arguments compatible if they aren't vectors or aggregates. // FIXME: Look at the size of vectors. // FIXME: Look at the element types of aggregates to see if there are vectors. - // FIXME: The API of this function seems intended to allow arguments - // to be removed from the set, but the caller doesn't check if the set - // becomes empty so that may not work in practice. - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast(A->getType())->getElementType(); - return EltTy->isVectorTy() || EltTy->isAggregateType(); - }); + return llvm::none_of(Types, + [](Type *T) { return T->isVectorTy() || T->isAggregateType(); }); } X86TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index c53424ec0026..11e9cb09c7d5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -234,9 +234,8 @@ class X86TTIImpl : public BasicTTIImplBase { bool isFCmpOrdCheaperThanFCmpZero(Type *Ty); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Type) const; TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool prefersVectorizedAddressing() const; diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 93bb11433775..3a42a2cac928 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -835,14 +835,20 @@ bool ArgumentPromotionPass::areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform) { + // TODO: Check individual arguments so we can promote a subset? + SmallVector Types; + for (Argument *Arg : ArgsToPromote) + Types.push_back(Arg->getType()->getPointerElementType()); + for (Argument *Arg : ByValArgsToTransform) + Types.push_back(Arg->getParamByValType()); + for (const Use &U : F.uses()) { CallBase *CB = dyn_cast(U.getUser()); if (!CB) return false; const Function *Caller = CB->getCaller(); const Function *Callee = CB->getCalledFunction(); - if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || - !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform)) + if (!TTI.areTypesABICompatible(Caller, Callee, Types)) return false; } return true; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 416c2ee45079..b977821bcaa6 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6414,31 +6414,36 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return indicatePessimisticFixpoint(); } + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + // Verify callee and caller agree on how the promoted argument would be // passed. - // TODO: The use of the ArgumentPromotion interface here is ugly, we need a - // specialized form of TargetTransformInfo::areFunctionArgsABICompatible - // which doesn't require the arguments ArgumentPromotion wanted to pass. Function &Fn = *getIRPosition().getAnchorScope(); - SmallPtrSet ArgsToPromote, Dummy; - ArgsToPromote.insert(getAssociatedArgument()); const auto *TTI = A.getInfoCache().getAnalysisResultForFunction(Fn); - if (!TTI || - !ArgumentPromotionPass::areFunctionArgsABICompatible( - Fn, *TTI, ArgsToPromote, Dummy) || - ArgsToPromote.empty()) { + if (!TTI) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Missing TTI for function " + << Fn.getName() << "\n"); + return indicatePessimisticFixpoint(); + } + + auto CallSiteCheck = [&](AbstractCallSite ACS) { + CallBase *CB = ACS.getInstruction(); + return TTI->areTypesABICompatible( + CB->getCaller(), CB->getCalledFunction(), ReplacementTypes); + }; + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, + AllCallSitesKnown)) { LLVM_DEBUG( dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for " << Fn.getName() << "\n"); return indicatePessimisticFixpoint(); } - // Collect the types that will replace the privatizable type in the function - // signature. - SmallVector ReplacementTypes; - identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); - // Register a rewrite of the argument. Argument *Arg = getAssociatedArgument(); if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) { @@ -6558,7 +6563,6 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return false; }; - bool AllCallSitesKnown; if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true, AllCallSitesKnown)) return indicatePessimisticFixpoint(); diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 516d4006894c..321d4a19a585 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -710,22 +710,13 @@ determinePointerAccessAttrs(Argument *A, if (CB.doesNotAccessMemory()) continue; - Function *F = CB.getCalledFunction(); - if (!F) { - if (CB.onlyReadsMemory()) { - IsRead = true; - continue; - } - return Attribute::None; - } - - if (CB.isArgOperand(U) && UseIndex < F->arg_size() && - SCCNodes.count(F->getArg(UseIndex))) { - // This is an argument which is part of the speculative SCC. Note that - // only operands corresponding to formal arguments of the callee can - // participate in the speculation. - break; - } + if (Function *F = CB.getCalledFunction()) + if (CB.isArgOperand(U) && UseIndex < F->arg_size() && + SCCNodes.count(F->getArg(UseIndex))) + // This is an argument which is part of the speculative SCC. Note + // that only operands corresponding to formal arguments of the callee + // can participate in the speculation. + break; // The accessors used on call site here do the right thing for calls and // invokes with operand bundles. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 18eb245779bf..8df4a4529f47 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -157,7 +157,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, Amt = Builder.CreateAdd(Amt, Off); } - AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt); + AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt); New->setAlignment(AI.getAlign()); New->takeName(&AI); New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 286a947fc603..0dbfdba353c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -183,7 +183,8 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { if (C->getValue().getActiveBits() <= 64) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); + AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(), + nullptr, AI.getName()); New->setAlignment(AI.getAlign()); // Scan to the end of the allocation instructions, to skip over a block of @@ -199,21 +200,13 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; - Instruction *NewI = GetElementPtrInst::CreateInBounds( + Instruction *GEP = GetElementPtrInst::CreateInBounds( NewTy, New, Idx, New->getName() + ".sub"); - IC.InsertNewInstBefore(NewI, *It); - - // Gracefully handle allocas in other address spaces. - if (AI.getType()->getPointerAddressSpace() != - NewI->getType()->getPointerAddressSpace()) { - NewI = - CastInst::CreatePointerBitCastOrAddrSpaceCast(NewI, AI.getType()); - IC.InsertNewInstBefore(NewI, *It); - } + IC.InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. - return IC.replaceInstUsesWith(AI, NewI); + return IC.replaceInstUsesWith(AI, GEP); } } diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 9eedd9fd151e..9f26b37bbc79 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1121,7 +1121,7 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { - NewF->removeFnAttrs(AttrBuilder().addAttribute("split-stack")); + NewF->removeFnAttr("split-stack"); CallInst::Create(DFSanVarargWrapperFn, IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", BB); diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 8c4523206070..dda1a2f08076 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -588,7 +588,7 @@ struct AllSwitchPaths { PrevBB = BB; } - if (TPath.isExitValueSet()) + if (TPath.isExitValueSet() && isSupported(TPath)) TPaths.push_back(TPath); } } @@ -683,6 +683,62 @@ struct AllSwitchPaths { return Res; } + /// The determinator BB should precede the switch-defining BB. + /// + /// Otherwise, it is possible that the state defined in the determinator block + /// defines the state for the next iteration of the loop, rather than for the + /// current one. + /// + /// Currently supported paths: + /// \code + /// < switch bb1 determ def > [ 42, determ ] + /// < switch_and_def bb1 determ > [ 42, determ ] + /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ] + /// \endcode + /// + /// Unsupported paths: + /// \code + /// < switch bb1 def determ > [ 43, determ ] + /// < switch_and_determ bb1 def > [ 43, switch_and_determ ] + /// \endcode + bool isSupported(const ThreadingPath &TPath) { + Instruction *SwitchCondI = dyn_cast(Switch->getCondition()); + assert(SwitchCondI); + if (!SwitchCondI) + return false; + + const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent(); + const BasicBlock *SwitchCondUseBB = Switch->getParent(); + const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB(); + + assert( + SwitchCondUseBB == TPath.getPath().front() && + "The first BB in a threading path should have the switch instruction"); + if (SwitchCondUseBB != TPath.getPath().front()) + return false; + + // Make DeterminatorBB the first element in Path. + PathType Path = TPath.getPath(); + auto ItDet = std::find(Path.begin(), Path.end(), DeterminatorBB); + std::rotate(Path.begin(), ItDet, Path.end()); + + bool IsDetBBSeen = false; + bool IsDefBBSeen = false; + bool IsUseBBSeen = false; + for (BasicBlock *BB : Path) { + if (BB == DeterminatorBB) + IsDetBBSeen = true; + if (BB == SwitchCondDefBB) + IsDefBBSeen = true; + if (BB == SwitchCondUseBB) + IsUseBBSeen = true; + if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen) + return false; + } + + return true; + } + SwitchInst *Switch; BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index ee6c800e07a5..eadbb4293539 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -175,44 +175,6 @@ static cl::opt using OverlapIntervalsTy = std::map; using InstOverlapIntervalsTy = DenseMap; -/// If the value of this instruction and the memory it writes to is unused, may -/// we delete this instruction? -static bool isRemovable(Instruction *I) { - // Don't remove volatile/atomic stores. - if (StoreInst *SI = dyn_cast(I)) - return SI->isUnordered(); - - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Does not have LocForWrite"); - case Intrinsic::lifetime_end: - // Never remove dead lifetime_end's, e.g. because it is followed by a - // free. - return false; - case Intrinsic::init_trampoline: - // Always safe to remove init_trampoline. - return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memcpy_inline: - // Don't remove volatile memory intrinsics. - return !cast(II)->isVolatile(); - case Intrinsic::memcpy_element_unordered_atomic: - case Intrinsic::memmove_element_unordered_atomic: - case Intrinsic::memset_element_unordered_atomic: - case Intrinsic::masked_store: - return true; - } - } - - // note: only get here for calls with analyzable writes. - if (auto *CB = dyn_cast(I)) - return CB->use_empty() && CB->willReturn() && CB->doesNotThrow(); - - return false; -} - /// Returns true if the end of this instruction can be safely shortened in /// length. static bool isShortenableAtTheEnd(Instruction *I) { @@ -835,7 +797,7 @@ struct DSEState { auto *MD = dyn_cast_or_null(MA); if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit && - (getLocForWriteEx(&I) || isMemTerminatorInst(&I))) + (getLocForWrite(&I) || isMemTerminatorInst(&I))) MemDefs.push_back(MD); } } @@ -1022,20 +984,39 @@ struct DSEState { return I.first->second; } - Optional getLocForWriteEx(Instruction *I) const { + Optional getLocForWrite(Instruction *I) const { if (!I->mayWriteToMemory()) return None; + if (auto *CB = dyn_cast(I)) + return MemoryLocation::getForDest(CB, TLI); + + return MemoryLocation::getOrNone(I); + } + + /// Assuming this instruction has a dead analyzable write, can we delete + /// this instruction? + bool isRemovable(Instruction *I) { + assert(getLocForWrite(I) && "Must have analyzable write"); + + // Don't remove volatile/atomic stores. + if (StoreInst *SI = dyn_cast(I)) + return SI->isUnordered(); + if (auto *CB = dyn_cast(I)) { - // If the functions may write to memory we do not know about, bail out. - if (!CB->onlyAccessesArgMemory() && - !CB->onlyAccessesInaccessibleMemOrArgMem()) - return None; + // Don't remove volatile memory intrinsics. + if (auto *MI = dyn_cast(CB)) + return !MI->isVolatile(); - return MemoryLocation::getForDest(CB, TLI); + // Never remove dead lifetime intrinsics, e.g. because they are followed + // by a free. + if (CB->isLifetimeStartOrEnd()) + return false; + + return CB->use_empty() && CB->willReturn() && CB->doesNotThrow(); } - return MemoryLocation::getOrNone(I); + return false; } /// Returns true if \p UseInst completely overwrites \p DefLoc @@ -1053,7 +1034,7 @@ struct DSEState { return false; int64_t InstWriteOffset, DepWriteOffset; - if (auto CC = getLocForWriteEx(UseInst)) + if (auto CC = getLocForWrite(UseInst)) return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset, DepWriteOffset) == OW_Complete; return false; @@ -1065,7 +1046,7 @@ struct DSEState { << *Def->getMemoryInst() << ") is at the end the function \n"); - auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + auto MaybeLoc = getLocForWrite(Def->getMemoryInst()); if (!MaybeLoc) { LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); return false; @@ -1209,30 +1190,14 @@ struct DSEState { /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. bool isGuaranteedLoopInvariant(const Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { - Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast(Ptr)) { - if (isa(Ptr)) - return true; - - if (isAllocLikeFn(I, &TLI)) - return true; - - return false; - } - return true; - }; - Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast(Ptr)) { - if (I->getParent()->isEntryBlock()) - return true; - } - if (auto *GEP = dyn_cast(Ptr)) { - return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && - GEP->hasAllConstantIndices(); - } - return IsGuaranteedLoopInvariantBase(Ptr); + if (auto *GEP = dyn_cast(Ptr)) + if (GEP->hasAllConstantIndices()) + Ptr = GEP->getPointerOperand()->stripPointerCasts(); + + if (auto *I = dyn_cast(Ptr)) + return I->getParent()->isEntryBlock(); + return true; } // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess, @@ -1344,7 +1309,7 @@ struct DSEState { // If Current does not have an analyzable write location or is not // removable, skip it. - CurrentLoc = getLocForWriteEx(CurrentI); + CurrentLoc = getLocForWrite(CurrentI); if (!CurrentLoc || !isRemovable(CurrentI)) { CanOptimize = false; continue; @@ -1702,12 +1667,12 @@ struct DSEState { dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n"); for (MemoryDef *Def : llvm::reverse(MemDefs)) { - if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst())) + if (SkipStores.contains(Def)) continue; Instruction *DefI = Def->getMemoryInst(); - auto DefLoc = getLocForWriteEx(DefI); - if (!DefLoc) + auto DefLoc = getLocForWrite(DefI); + if (!DefLoc || !isRemovable(DefI)) continue; // NOTE: Currently eliminating writes at the end of a function is limited @@ -1734,13 +1699,19 @@ struct DSEState { /// \returns true if \p Def is a no-op store, either because it /// directly stores back a loaded value or stores zero to a calloced object. bool storeIsNoop(MemoryDef *Def, const Value *DefUO) { - StoreInst *Store = dyn_cast(Def->getMemoryInst()); - MemSetInst *MemSet = dyn_cast(Def->getMemoryInst()); + Instruction *DefI = Def->getMemoryInst(); + StoreInst *Store = dyn_cast(DefI); + MemSetInst *MemSet = dyn_cast(DefI); Constant *StoredConstant = nullptr; if (Store) StoredConstant = dyn_cast(Store->getOperand(0)); - if (MemSet) + else if (MemSet) StoredConstant = dyn_cast(MemSet->getValue()); + else + return false; + + if (!isRemovable(DefI)) + return false; if (StoredConstant && StoredConstant->isNullValue()) { auto *DefUOInst = dyn_cast(DefUO); @@ -1873,7 +1844,7 @@ struct DSEState { bool Changed = false; for (auto OI : IOL) { Instruction *DeadI = OI.first; - MemoryLocation Loc = *getLocForWriteEx(DeadI); + MemoryLocation Loc = *getLocForWrite(DeadI); assert(isRemovable(DeadI) && "Expect only removable instruction"); const Value *Ptr = Loc.Ptr->stripPointerCasts(); @@ -1896,9 +1867,14 @@ struct DSEState { LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the " "already existing value\n"); for (auto *Def : MemDefs) { - if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) || - !isRemovable(Def->getMemoryInst())) + if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def)) continue; + + Instruction *DefInst = Def->getMemoryInst(); + auto MaybeDefLoc = getLocForWrite(DefInst); + if (!MaybeDefLoc || !isRemovable(DefInst)) + continue; + MemoryDef *UpperDef; // To conserve compile-time, we avoid walking to the next clobbering def. // Instead, we just try to get the optimized access, if it exists. DSE @@ -1910,17 +1886,14 @@ struct DSEState { if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef)) continue; - Instruction *DefInst = Def->getMemoryInst(); Instruction *UpperInst = UpperDef->getMemoryInst(); - auto IsRedundantStore = [this, DefInst, - UpperInst](MemoryLocation UpperLoc) { + auto IsRedundantStore = [&]() { if (DefInst->isIdenticalTo(UpperInst)) return true; if (auto *MemSetI = dyn_cast(UpperInst)) { if (auto *SI = dyn_cast(DefInst)) { - auto MaybeDefLoc = getLocForWriteEx(DefInst); - if (!MaybeDefLoc) - return false; + // MemSetInst must have a write location. + MemoryLocation UpperLoc = *getLocForWrite(UpperInst); int64_t InstWriteOffset = 0; int64_t DepWriteOffset = 0; auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc, @@ -1933,9 +1906,7 @@ struct DSEState { return false; }; - auto MaybeUpperLoc = getLocForWriteEx(UpperInst); - if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) || - isReadClobber(*MaybeUpperLoc, DefInst)) + if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst)) continue; LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst << '\n'); @@ -1966,7 +1937,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, MaybeKillingLoc = State.getLocForTerminator(KillingI).map( [](const std::pair &P) { return P.first; }); else - MaybeKillingLoc = State.getLocForWriteEx(KillingI); + MaybeKillingLoc = State.getLocForWrite(KillingI); if (!MaybeKillingLoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " @@ -2030,7 +2001,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, if (!DebugCounter::shouldExecute(MemorySSACounter)) continue; - MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI); + MemoryLocation DeadLoc = *State.getLocForWrite(DeadI); if (IsMemTerm) { const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr); @@ -2095,8 +2066,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, } // Check if the store is a no-op. - if (!Shortend && isRemovable(KillingI) && - State.storeIsNoop(KillingDef, KillingUndObj)) { + if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) { LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI << '\n'); State.deleteDeadInstruction(KillingI); diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 77d76609c926..57e36e5b9b90 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -224,8 +224,8 @@ bool LoopDataPrefetch::run() { bool MadeChange = false; for (Loop *I : *LI) - for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) - MadeChange |= runOnLoop(*L); + for (Loop *L : depth_first(I)) + MadeChange |= runOnLoop(L); return MadeChange; } diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 2d3490b2d29e..e12eca0ed287 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1359,16 +1359,6 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] = Attribute::InaccessibleMemOrArgMemOnly, Attribute::NoSync, Attribute::NoFree}; -// List of all parameter and return attributes which must be stripped when -// lowering from the abstract machine model. Note that we list attributes -// here which aren't valid as return attributes, that is okay. There are -// also some additional attributes with arguments which are handled -// explicitly and are not in this list. -static constexpr Attribute::AttrKind ParamAttrsToStrip[] = - {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly, - Attribute::NoAlias, Attribute::NoFree}; - - // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. static AttributeList legalizeCallAttributes(LLVMContext &Ctx, @@ -2650,24 +2640,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, return !Records.empty(); } -// Handles both return values and arguments for Functions and calls. -template -static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, - unsigned Index) { +// List of all parameter and return attributes which must be stripped when +// lowering from the abstract machine model. Note that we list attributes +// here which aren't valid as return attributes, that is okay. +static AttrBuilder getParamAndReturnAttributesToRemove() { AttrBuilder R; - AttributeSet AS = AH.getAttributes().getAttributes(Index); - if (AS.getDereferenceableBytes()) - R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable, - AS.getDereferenceableBytes())); - if (AS.getDereferenceableOrNullBytes()) - R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, - AS.getDereferenceableOrNullBytes())); - for (auto Attr : ParamAttrsToStrip) - if (AS.hasAttribute(Attr)) - R.addAttribute(Attr); - - if (!R.empty()) - AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R)); + R.addDereferenceableAttr(1); + R.addDereferenceableOrNullAttr(1); + R.addAttribute(Attribute::ReadNone); + R.addAttribute(Attribute::ReadOnly); + R.addAttribute(Attribute::WriteOnly); + R.addAttribute(Attribute::NoAlias); + R.addAttribute(Attribute::NoFree); + return R; } static void stripNonValidAttributesFromPrototype(Function &F) { @@ -2683,13 +2668,13 @@ static void stripNonValidAttributesFromPrototype(Function &F) { return; } + AttrBuilder R = getParamAndReturnAttributesToRemove(); for (Argument &A : F.args()) if (isa(A.getType())) - RemoveNonValidAttrAtIndex(Ctx, F, - A.getArgNo() + AttributeList::FirstArgIndex); + F.removeParamAttrs(A.getArgNo(), R); if (isa(F.getReturnType())) - RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); + F.removeRetAttrs(R); for (auto Attr : FnAttrsToStrip) F.removeFnAttr(Attr); @@ -2757,13 +2742,13 @@ static void stripNonValidDataFromBody(Function &F) { stripInvalidMetadataFromInstruction(I); + AttrBuilder R = getParamAndReturnAttributesToRemove(); if (auto *Call = dyn_cast(&I)) { for (int i = 0, e = Call->arg_size(); i != e; i++) if (isa(Call->getArgOperand(i)->getType())) - RemoveNonValidAttrAtIndex(Ctx, *Call, - i + AttributeList::FirstArgIndex); + Call->removeParamAttrs(i, R); if (isa(Call->getType())) - RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex); + Call->removeRetAttrs(R); } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ec95cd62ed26..4747f34fcc62 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2963,7 +2963,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized // instruction could feed a poison value to the base address of the widen // load/store. - if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0) + if (State.MayGeneratePoisonRecipes.contains(RepRecipe)) Cloned->dropPoisonGeneratingFlags(); State.Builder.SetInsertPoint(Builder.GetInsertBlock(), @@ -8448,7 +8448,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { // Introduce the early-exit compare IV <= BTC to form header block mask. // This is used instead of IV < TC because TC may wrap, unlike BTC. - // Start by constructing the desired canonical IV. + // Start by constructing the desired canonical IV in the header block. VPValue *IV = nullptr; if (Legal->getPrimaryInduction()) IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); @@ -8795,13 +8795,17 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( return VPBB; } LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - assert(VPBB->getSuccessors().empty() && - "VPBB has successors when handling predicated replication."); + + VPBlockBase *SingleSucc = VPBB->getSingleSuccessor(); + assert(SingleSucc && "VPBB must have a single successor when handling " + "predicated replication."); + VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); + VPBlockUtils::connectBlocks(RegSucc, SingleSucc); return RegSucc; } @@ -9022,30 +9026,25 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - auto Plan = std::make_unique(); + // Create initial VPlan skeleton, with separate header and latch blocks. + VPBasicBlock *HeaderVPBB = new VPBasicBlock(); + VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); + auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); + auto Plan = std::make_unique(TopRegion); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - VPBasicBlock *VPBB = nullptr; - VPBasicBlock *HeaderVPBB = nullptr; + VPBasicBlock *VPBB = HeaderVPBB; SmallVector InductionsToMove; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - if (VPBB) - VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); - else { - auto *TopRegion = new VPRegionBlock("vector loop"); - TopRegion->setEntry(FirstVPBBForBB); - Plan->setEntry(TopRegion); - HeaderVPBB = FirstVPBBForBB; - } - VPBB = FirstVPBBForBB; + VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. @@ -9112,8 +9111,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( : ""); } } + + VPBlockUtils::insertBlockAfter(new VPBasicBlock(), VPBB); + VPBB = cast(VPBB->getSingleSuccessor()); } + // Fold the last, empty block into its predecessor. + VPBB = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB); + assert(VPBB && "expected to fold last (empty) block"); + // After here, VPBB should not be used. + VPBB = nullptr; + assert(isa(Plan->getEntry()) && !Plan->getEntry()->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " @@ -9183,13 +9191,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock); VPBlockUtils::connectBlocks(SplitPred, SinkRegion); VPBlockUtils::connectBlocks(SinkRegion, SplitBlock); - if (VPBB == SplitPred) - VPBB = SplitBlock; } } - cast(Plan->getEntry())->setExit(VPBB); - VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates @@ -9198,7 +9202,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start); + adjustRecipesForReductions(cast(TopRegion->getExit()), Plan, + RecipeBuilder, Range.Start); // Introduce a recipe to combine the incoming and previous values of a // first-order recurrence. @@ -9278,6 +9283,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( RSO.flush(); Plan->setName(PlanName); + // Fold Exit block into its predecessor if possible. + // TODO: Fold block earlier once all VPlan transforms properly maintain a + // VPBasicBlock as exit. + VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit()); + assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } @@ -9523,7 +9533,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { // exact, etc.). The control flow has been linearized and the // instruction is no longer guarded by the predicate, which could make // the flag properties to no longer hold. - if (State.MayGeneratePoisonRecipes.count(this) > 0) + if (State.MayGeneratePoisonRecipes.contains(this)) VecOp->dropPoisonGeneratingFlags(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index dfe75105ac42..f4a1883e35d5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2352,18 +2352,23 @@ class VPBlockUtils { /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p - /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr - /// has more than one successor, its conditional bit is propagated to \p - /// NewBlock. \p NewBlock must have neither successors nor predecessors. + /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's + /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's + /// conditional bit is propagated to \p NewBlock. \p NewBlock must have + /// neither successors nor predecessors. static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && - "Can't insert new block with successors."); - // TODO: move successors from BlockPtr to NewBlock when this functionality - // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr - // already has successors. - BlockPtr->setOneSuccessor(NewBlock); - NewBlock->setPredecessors({BlockPtr}); + NewBlock->getPredecessors().empty() && + "Can't insert new block with predecessors or successors."); NewBlock->setParent(BlockPtr->getParent()); + SmallVector Succs(BlockPtr->successors()); + for (VPBlockBase *Succ : Succs) { + disconnectBlocks(BlockPtr, Succ); + connectBlocks(NewBlock, Succ); + } + NewBlock->setCondBit(BlockPtr->getCondBit()); + BlockPtr->setCondBit(nullptr); + connectBlocks(BlockPtr, NewBlock); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p @@ -2406,6 +2411,31 @@ class VPBlockUtils { To->removePredecessor(From); } + /// Try to merge \p Block into its single predecessor, if \p Block is a + /// VPBasicBlock and its predecessor has a single successor. Returns a pointer + /// to the predecessor \p Block was merged into or nullptr otherwise. + static VPBasicBlock *tryToMergeBlockIntoPredecessor(VPBlockBase *Block) { + auto *VPBB = dyn_cast(Block); + auto *PredVPBB = + dyn_cast_or_null(Block->getSinglePredecessor()); + if (!VPBB || !PredVPBB || PredVPBB->getNumSuccessors() != 1) + return nullptr; + + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) + R.moveBefore(*PredVPBB, PredVPBB->end()); + VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); + auto *ParentRegion = cast(Block->getParent()); + if (ParentRegion->getExit() == Block) + ParentRegion->setExit(PredVPBB); + SmallVector Successors(Block->successors()); + for (auto *Succ : Successors) { + VPBlockUtils::disconnectBlocks(Block, Succ); + VPBlockUtils::connectBlocks(PredVPBB, Succ); + } + delete Block; + return PredVPBB; + } + /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge. static bool isBackEdge(const VPBlockBase *FromBlock, const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) { diff --git a/llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll b/llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll new file mode 100644 index 000000000000..4f9a30bc8841 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +%T1 = type { i32, float, <4 x i1> } + +define void @extract_1() { +; ALL-LABEL: 'extract_1' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = extractvalue { i32, i32 } undef, 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = extractvalue { i32, i32 } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = extractvalue { i32, i1 } undef, 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = extractvalue { i32, i1 } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = extractvalue { i32, float } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = extractvalue { i32, [42 x i42] } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = extractvalue { i32, <42 x i42> } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = extractvalue { i32, %T1 } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'extract_1' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = extractvalue { i32, i32 } undef, 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = extractvalue { i32, i32 } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = extractvalue { i32, i1 } undef, 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = extractvalue { i32, i1 } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = extractvalue { i32, float } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = extractvalue { i32, [42 x i42] } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = extractvalue { i32, <42 x i42> } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = extractvalue { i32, %T1 } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %r0 = extractvalue {i32, i32} undef, 0 + %r1 = extractvalue {i32, i32} undef, 1 + %r2 = extractvalue {i32, i1} undef, 0 + %r3 = extractvalue {i32, i1} undef, 1 + %r4 = extractvalue {i32, float} undef, 1 + %r5 = extractvalue {i32, [42 x i42]} undef, 1 + %r6 = extractvalue {i32, <42 x i42>} undef, 1 + %r7 = extractvalue {i32, %T1} undef, 1 + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll new file mode 100644 index 000000000000..01913a7204a6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define i32 @fcopysign(i32 %arg) { +; ALL-LABEL: 'fcopysign' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fcopysign' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.copysign.f32(float undef, float undef) + %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.copysign.f64(double undef, double undef) + %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +define i32 @fsqrt(i32 %arg) { +; ALL-LABEL: 'fsqrt' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fsqrt' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.sqrt.f32(float undef) + %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) + + %F64 = call double @llvm.sqrt.f64(double undef) + %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) + + ret i32 undef +} + +declare float @llvm.copysign.f32(float, float) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) + +declare double @llvm.copysign.f64(double, double) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) +declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) + +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll new file mode 100644 index 000000000000..d75c38bfb342 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +declare i64 @llvm.smax.i64(i64, i64) +declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.smax.i32(i32, i32) +declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.smax.i16(i16, i16) +declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.smax.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.smax.i8(i8, i8) +declare <2 x i8> @llvm.smax.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>) + +define i32 @smax(i32 %arg) { +; FAST-LABEL: 'smax' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'smax' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'smax' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} + +declare i64 @llvm.smin.i64(i64, i64) +declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.smin.i32(i32, i32) +declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.smin.i16(i16, i16) +declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.smin.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.smin.i8(i8, i8) +declare <2 x i8> @llvm.smin.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>) + +define i32 @smin(i32 %arg) { +; FAST-LABEL: 'smin' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'smin' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'smin' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll new file mode 100644 index 000000000000..f42a1a0990c2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +declare i64 @llvm.umax.i64(i64, i64) +declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.umax.i32(i32, i32) +declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.umax.i16(i16, i16) +declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.umax.i8(i8, i8) +declare <2 x i8> @llvm.umax.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>) + +define i32 @umax(i32 %arg) { +; FAST-LABEL: 'umax' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'umax' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'umax' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} + +declare i64 @llvm.umin.i64(i64, i64) +declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.umin.i32(i32, i32) +declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.umin.i16(i16, i16) +declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.umin.i8(i8, i8) +declare <2 x i8> @llvm.umin.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>) + +define i32 @umin(i32 %arg) { +; FAST-LABEL: 'umin' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'umin' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'umin' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/cast.ll b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll new file mode 100644 index 000000000000..308dd4c40a10 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll @@ -0,0 +1,449 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @add(i32 %arg) { + ; -- Same size registeres -- +; ALL-LABEL: 'add' +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = zext <4 x i1> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = sext <4 x i1> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = trunc <4 x i32> undef to <4 x i1> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D = zext <8 x i1> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %E = sext <8 x i1> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %H = trunc i32 undef to i1 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'add' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = zext <4 x i1> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = sext <4 x i1> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = trunc <4 x i32> undef to <4 x i1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D = zext <8 x i1> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %E = sext <8 x i1> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %H = trunc i32 undef to i1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %A = zext <4 x i1> undef to <4 x i32> + %B = sext <4 x i1> undef to <4 x i32> + %C = trunc <4 x i32> undef to <4 x i1> + + ; -- Different size registers -- + %D = zext <8 x i1> undef to <8 x i32> + %E = sext <8 x i1> undef to <8 x i32> + %F = trunc <8 x i32> undef to <8 x i1> + + ; -- scalars -- + %G = zext i1 undef to i32 + %H = trunc i32 undef to i1 + + ret i32 undef +} + +define i32 @zext_sext(<8 x i1> %in) { +; FAST-LABEL: 'zext_sext' +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'zext_sext' +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'zext_sext' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'zext_sext' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %Z = zext <8 x i1> %in to <8 x i32> + %S = sext <8 x i1> %in to <8 x i32> + + %A1 = zext <16 x i8> undef to <16 x i16> + %A2 = sext <16 x i8> undef to <16 x i16> + %A = sext <8 x i16> undef to <8 x i32> + %B = zext <8 x i16> undef to <8 x i32> + %C = sext <4 x i32> undef to <4 x i64> + + %C.v8i8.z = zext <8 x i8> undef to <8 x i32> + %C.v8i8.s = sext <8 x i8> undef to <8 x i32> + %C.v4i16.z = zext <4 x i16> undef to <4 x i64> + %C.v4i16.s = sext <4 x i16> undef to <4 x i64> + + %C.v4i8.z = zext <4 x i8> undef to <4 x i64> + %C.v4i8.s = sext <4 x i8> undef to <4 x i64> + + %D = zext <4 x i32> undef to <4 x i64> + + %D1 = zext <8 x i32> undef to <8 x i64> + + %D2 = sext <8 x i32> undef to <8 x i64> + + %D3 = zext <16 x i16> undef to <16 x i32> + %D4 = zext <16 x i8> undef to <16 x i32> + %D5 = zext <16 x i1> undef to <16 x i32> + + %E = trunc <4 x i64> undef to <4 x i32> + %F = trunc <8 x i32> undef to <8 x i16> + %F1 = trunc <16 x i16> undef to <16 x i8> + %F2 = trunc <8 x i32> undef to <8 x i8> + %F3 = trunc <4 x i64> undef to <4 x i8> + + %G = trunc <8 x i64> undef to <8 x i32> + %G1 = trunc <16 x i32> undef to <16 x i16> + %G2 = trunc <16 x i32> undef to <16 x i8> + ret i32 undef +} + +define i32 @masks8(<8 x i1> %in) { +; ALL-LABEL: 'masks8' +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'masks8' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %Z = zext <8 x i1> %in to <8 x i32> + %S = sext <8 x i1> %in to <8 x i32> + ret i32 undef +} + +define i32 @masks4(<4 x i1> %in) { +; ALL-LABEL: 'masks4' +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <4 x i1> %in to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <4 x i1> %in to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'masks4' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <4 x i1> %in to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <4 x i1> %in to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %Z = zext <4 x i1> %in to <4 x i64> + %S = sext <4 x i1> %in to <4 x i64> + ret i32 undef +} + +define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { +; FAST-LABEL: 'sitofp4' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'sitofp4' +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'sitofp4' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'sitofp4' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = sitofp <4 x i1> %a to <4 x float> + %A2 = sitofp <4 x i1> %a to <4 x double> + %B1 = sitofp <4 x i8> %b to <4 x float> + %B2 = sitofp <4 x i8> %b to <4 x double> + %C1 = sitofp <4 x i16> %c to <4 x float> + %C2 = sitofp <4 x i16> %c to <4 x double> + %D1 = sitofp <4 x i32> %d to <4 x float> + %D2 = sitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { +; FAST-LABEL: 'sitofp8' +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'sitofp8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'sitofp8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'sitofp8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = sitofp <8 x i1> %a to <8 x float> + %B1 = sitofp <8 x i8> %b to <8 x float> + %C1 = sitofp <8 x i16> %c to <8 x float> + %D1 = sitofp <8 x i32> %d to <8 x float> + ret void +} + +define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { +; FAST-LABEL: 'uitofp4' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'uitofp4' +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'uitofp4' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'uitofp4' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = uitofp <4 x i1> %a to <4 x float> + %A2 = uitofp <4 x i1> %a to <4 x double> + %B1 = uitofp <4 x i8> %b to <4 x float> + %B2 = uitofp <4 x i8> %b to <4 x double> + %C1 = uitofp <4 x i16> %c to <4 x float> + %C2 = uitofp <4 x i16> %c to <4 x double> + %D1 = uitofp <4 x i32> %d to <4 x float> + %D2 = uitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { +; FAST-LABEL: 'uitofp8' +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'uitofp8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'uitofp8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'uitofp8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = uitofp <8 x i1> %a to <8 x float> + %B1 = uitofp <8 x i8> %b to <8 x float> + %C1 = uitofp <8 x i16> %c to <8 x float> + %D1 = uitofp <8 x i32> %d to <8 x float> + ret void +} + +define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) { +; ALL-LABEL: 'fp_conv' +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = fpext <4 x float> %c to <4 x double> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = fpext <8 x float> %a to <8 x double> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A3 = fptrunc <4 x double> undef to <4 x float> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'fp_conv' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = fpext <4 x float> %c to <4 x double> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = fpext <8 x float> %a to <8 x double> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A3 = fptrunc <4 x double> undef to <4 x float> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = fpext <4 x float> %c to <4 x double> + %A2 = fpext <8 x float> %a to <8 x double> + %A3 = fptrunc <4 x double> undef to <4 x float> + %A4 = fptrunc <8 x double> undef to <8 x float> + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/div.ll b/llvm/test/Analysis/CostModel/AMDGPU/div.ll new file mode 100644 index 000000000000..7114b81025be --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/div.ll @@ -0,0 +1,1145 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define i32 @sdiv() { +; FAST-LABEL: 'sdiv' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, undef + %V2i64 = sdiv <2 x i64> undef, undef + %V4i64 = sdiv <4 x i64> undef, undef + %V8i64 = sdiv <8 x i64> undef, undef + + %I32 = sdiv i32 undef, undef + %V4i32 = sdiv <4 x i32> undef, undef + %V8i32 = sdiv <8 x i32> undef, undef + %V16i32 = sdiv <16 x i32> undef, undef + + %I16 = sdiv i16 undef, undef + %V8i16 = sdiv <8 x i16> undef, undef + %V16i16 = sdiv <16 x i16> undef, undef + %V32i16 = sdiv <32 x i16> undef, undef + + %I8 = sdiv i8 undef, undef + %V16i8 = sdiv <16 x i8> undef, undef + %V32i8 = sdiv <32 x i8> undef, undef + %V64i8 = sdiv <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @udiv() { +; FAST-LABEL: 'udiv' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, undef + %V2i64 = udiv <2 x i64> undef, undef + %V4i64 = udiv <4 x i64> undef, undef + %V8i64 = udiv <8 x i64> undef, undef + + %I32 = udiv i32 undef, undef + %V4i32 = udiv <4 x i32> undef, undef + %V8i32 = udiv <8 x i32> undef, undef + %V16i32 = udiv <16 x i32> undef, undef + + %I16 = udiv i16 undef, undef + %V8i16 = udiv <8 x i16> undef, undef + %V16i16 = udiv <16 x i16> undef, undef + %V32i16 = udiv <32 x i16> undef, undef + + %I8 = udiv i8 undef, undef + %V16i8 = udiv <16 x i8> undef, undef + %V32i8 = udiv <32 x i8> undef, undef + %V64i8 = udiv <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @sdiv_const() { +; FAST-LABEL: 'sdiv_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 7 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 7 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 7 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 7 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_const() { +; FAST-LABEL: 'udiv_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 7 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 7 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 7 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 7 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_uniformconst() { +; FAST-LABEL: 'sdiv_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 7 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 7 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 7 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 7 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_uniformconst() { +; FAST-LABEL: 'udiv_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 7 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 7 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 7 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 7 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_constpow2() { +; FAST-LABEL: 'sdiv_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_constpow2() { +; FAST-LABEL: 'udiv_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_uniformconstpow2() { +; FAST-LABEL: 'sdiv_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_uniformconstpow2() { +; FAST-LABEL: 'udiv_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_constnegpow2() { +; FAST-LABEL: 'sdiv_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, -16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, -16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, -16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, -16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_constnegpow2() { +; FAST-LABEL: 'udiv_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, -16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, -16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, -16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, -16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_uniformconstnegpow2() { +; FAST-LABEL: 'sdiv_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, -16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, -16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, -16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, -16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_uniformconstnegpow2() { +; FAST-LABEL: 'udiv_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, -16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, -16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, -16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, -16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 310598da87d7..fe5b57be5d3b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -575,5 +575,107 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 { ret void } +define i32 @frem(i32 %arg) { +; CIFASTF64-LABEL: 'frem' +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %F64 = frem double undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V2F64 = frem <2 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V4F64 = frem <4 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V8F64 = frem <8 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; CISLOWF64-LABEL: 'frem' +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = frem double undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = frem <2 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = frem <4 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %V8F64 = frem <8 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SIFASTF64-LABEL: 'frem' +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F64 = frem double undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2F64 = frem <2 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4F64 = frem <4 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %V8F64 = frem <8 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SISLOWF64-LABEL: 'frem' +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %F64 = frem double undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V2F64 = frem <2 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V4F64 = frem <4 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V8F64 = frem <8 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FP16-LABEL: 'frem' +; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = frem double undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = frem <2 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = frem <4 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %V8F64 = frem <8 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; CI-SIZE-LABEL: 'frem' +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = frem float undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = frem <4 x float> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = frem <8 x float> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = frem <16 x float> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = frem double undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V2F64 = frem <2 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4F64 = frem <4 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V8F64 = frem <8 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SI-SIZE-LABEL: 'frem' +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = frem float undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = frem <4 x float> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = frem <8 x float> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = frem <16 x float> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %F64 = frem double undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V2F64 = frem <2 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V4F64 = frem <4 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %V8F64 = frem <8 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FP16-SIZE-LABEL: 'frem' +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = frem float undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = frem <4 x float> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = frem <8 x float> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = frem <16 x float> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = frem double undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V2F64 = frem <2 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4F64 = frem <4 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V8F64 = frem <8 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = frem float undef, undef + %V4F32 = frem <4 x float> undef, undef + %V8F32 = frem <8 x float> undef, undef + %V16F32 = frem <16 x float> undef, undef + + %F64 = frem double undef, undef + %V2F64 = frem <2 x double> undef, undef + %V4F64 = frem <4 x double> undef, undef + %V8F64 = frem <8 x double> undef, undef + + ret i32 undef +} + attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll index bf33cdc63553..4125d60f7ecb 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll @@ -89,3 +89,39 @@ define amdgpu_kernel void @fneg_f16() { %v17f16 = fneg <17 x half> undef ret void } + +define i32 @fneg_idiom(i32 %arg) { +; CHECK-LABEL: 'fneg_idiom' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SIZE-LABEL: 'fneg_idiom' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float -0.0, undef + %V4F32 = fsub <4 x float> , undef + %V8F32 = fsub <8 x float> , undef + %V16F32 = fsub <16 x float> , undef + + %F64 = fsub double -0.0, undef + %V2F64 = fsub <2 x double> , undef + %V4F64 = fsub <4 x double> , undef + %V8F64 = fsub <8 x double> , undef + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll new file mode 100644 index 000000000000..3c24d6cdcd28 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @fptosi_double_i64(i32 %arg) { +; ALL-LABEL: 'fptosi_double_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_double_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptosi double undef to i64 + %V2I64 = fptosi <2 x double> undef to <2 x i64> + %V4I64 = fptosi <4 x double> undef to <4 x i64> + %V8I64 = fptosi <8 x double> undef to <8 x i64> + ret i32 undef +} + +define i32 @fptosi_double_i32(i32 %arg) { +; ALL-LABEL: 'fptosi_double_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_double_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptosi double undef to i32 + %V2I32 = fptosi <2 x double> undef to <2 x i32> + %V4I32 = fptosi <4 x double> undef to <4 x i32> + %V8I32 = fptosi <8 x double> undef to <8 x i32> + ret i32 undef +} + +define i32 @fptosi_double_i16(i32 %arg) { +; FAST-LABEL: 'fptosi_double_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_double_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_double_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_double_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptosi double undef to i16 + %V2I16 = fptosi <2 x double> undef to <2 x i16> + %V4I16 = fptosi <4 x double> undef to <4 x i16> + %V8I16 = fptosi <8 x double> undef to <8 x i16> + ret i32 undef +} + +define i32 @fptosi_double_i8(i32 %arg) { +; FAST-LABEL: 'fptosi_double_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_double_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_double_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_double_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptosi double undef to i8 + %V2I8 = fptosi <2 x double> undef to <2 x i8> + %V4I8 = fptosi <4 x double> undef to <4 x i8> + %V8I8 = fptosi <8 x double> undef to <8 x i8> + ret i32 undef +} + +define i32 @fptosi_float_i64(i32 %arg) { +; ALL-LABEL: 'fptosi_float_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_float_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptosi float undef to i64 + %V2I64 = fptosi <2 x float> undef to <2 x i64> + %V4I64 = fptosi <4 x float> undef to <4 x i64> + %V8I64 = fptosi <8 x float> undef to <8 x i64> + %V16I64 = fptosi <16 x float> undef to <16 x i64> + ret i32 undef +} + +define i32 @fptosi_float_i32(i32 %arg) { +; ALL-LABEL: 'fptosi_float_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_float_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptosi float undef to i32 + %V2I32 = fptosi <2 x float> undef to <2 x i32> + %V4I32 = fptosi <4 x float> undef to <4 x i32> + %V8I32 = fptosi <8 x float> undef to <8 x i32> + %V16I32 = fptosi <16 x float> undef to <16 x i32> + ret i32 undef +} + +define i32 @fptosi_float_i16(i32 %arg) { +; FAST-LABEL: 'fptosi_float_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_float_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_float_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_float_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptosi float undef to i16 + %V2I16 = fptosi <2 x float> undef to <2 x i16> + %V4I16 = fptosi <4 x float> undef to <4 x i16> + %V8I16 = fptosi <8 x float> undef to <8 x i16> + %V16I16 = fptosi <16 x float> undef to <16 x i16> + ret i32 undef +} + +define i32 @fptosi_float_i8(i32 %arg) { +; FAST-LABEL: 'fptosi_float_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_float_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_float_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_float_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptosi float undef to i8 + %V2I8 = fptosi <2 x float> undef to <2 x i8> + %V4I8 = fptosi <4 x float> undef to <4 x i8> + %V8I8 = fptosi <8 x float> undef to <8 x i8> + %V16I8 = fptosi <16 x float> undef to <16 x i8> + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll new file mode 100644 index 000000000000..e2f75a9f302c --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @fptoui_double_i64(i32 %arg) { +; ALL-LABEL: 'fptoui_double_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_double_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptoui double undef to i64 + %V2I64 = fptoui <2 x double> undef to <2 x i64> + %V4I64 = fptoui <4 x double> undef to <4 x i64> + %V8I64 = fptoui <8 x double> undef to <8 x i64> + ret i32 undef +} + +define i32 @fptoui_double_i32(i32 %arg) { +; ALL-LABEL: 'fptoui_double_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_double_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptoui double undef to i32 + %V2I32 = fptoui <2 x double> undef to <2 x i32> + %V4I32 = fptoui <4 x double> undef to <4 x i32> + %V8I32 = fptoui <8 x double> undef to <8 x i32> + ret i32 undef +} + +define i32 @fptoui_double_i16(i32 %arg) { +; FAST-LABEL: 'fptoui_double_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_double_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_double_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_double_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptoui double undef to i16 + %V2I16 = fptoui <2 x double> undef to <2 x i16> + %V4I16 = fptoui <4 x double> undef to <4 x i16> + %V8I16 = fptoui <8 x double> undef to <8 x i16> + ret i32 undef +} + +define i32 @fptoui_double_i8(i32 %arg) { +; FAST-LABEL: 'fptoui_double_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_double_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_double_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_double_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptoui double undef to i8 + %V2I8 = fptoui <2 x double> undef to <2 x i8> + %V4I8 = fptoui <4 x double> undef to <4 x i8> + %V8I8 = fptoui <8 x double> undef to <8 x i8> + ret i32 undef +} + +define i32 @fptoui_float_i64(i32 %arg) { +; ALL-LABEL: 'fptoui_float_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_float_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptoui float undef to i64 + %V2I64 = fptoui <2 x float> undef to <2 x i64> + %V4I64 = fptoui <4 x float> undef to <4 x i64> + %V8I64 = fptoui <8 x float> undef to <8 x i64> + %V16I64 = fptoui <16 x float> undef to <16 x i64> + ret i32 undef +} + +define i32 @fptoui_float_i32(i32 %arg) { +; ALL-LABEL: 'fptoui_float_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_float_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptoui float undef to i32 + %V2I32 = fptoui <2 x float> undef to <2 x i32> + %V4I32 = fptoui <4 x float> undef to <4 x i32> + %V8I32 = fptoui <8 x float> undef to <8 x i32> + %V16I32 = fptoui <16 x float> undef to <16 x i32> + ret i32 undef +} + +define i32 @fptoui_float_i16(i32 %arg) { +; FAST-LABEL: 'fptoui_float_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_float_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_float_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_float_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptoui float undef to i16 + %V2I16 = fptoui <2 x float> undef to <2 x i16> + %V4I16 = fptoui <4 x float> undef to <4 x i16> + %V8I16 = fptoui <8 x float> undef to <8 x i16> + %V16I16 = fptoui <16 x float> undef to <16 x i16> + ret i32 undef +} + +define i32 @fptoui_float_i8(i32 %arg) { +; FAST-LABEL: 'fptoui_float_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_float_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_float_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_float_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptoui float undef to i8 + %V2I8 = fptoui <2 x float> undef to <2 x i8> + %V4I8 = fptoui <4 x float> undef to <4 x i8> + %V8I8 = fptoui <8 x float> undef to <8 x i8> + %V16I8 = fptoui <16 x float> undef to <16 x i8> + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fround.ll b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll new file mode 100644 index 000000000000..fec0bd737304 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @ceil(i32 %arg) { +; FAST-LABEL: 'ceil' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'ceil' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'ceil' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'ceil' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.ceil.f32(float undef) + %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) + + %F64 = call double @llvm.ceil.f64(double undef) + %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @floor(i32 %arg) { +; ALL-LABEL: 'floor' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'floor' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.floor.f32(float undef) + %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) + + %F64 = call double @llvm.floor.f64(double undef) + %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @nearbyint(i32 %arg) { +; ALL-LABEL: 'nearbyint' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'nearbyint' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.nearbyint.f32(float undef) + %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) + + %F64 = call double @llvm.nearbyint.f64(double undef) + %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @rint(i32 %arg) { +; FAST-LABEL: 'rint' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'rint' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'rint' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'rint' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.rint.f32(float undef) + %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) + + %F64 = call double @llvm.rint.f64(double undef) + %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @trunc(i32 %arg) { +; FAST-LABEL: 'trunc' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'trunc' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'trunc' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'trunc' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.trunc.f32(float undef) + %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) + + %F64 = call double @llvm.trunc.f64(double undef) + %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) + + ret i32 undef +} + +declare float @llvm.ceil.f32(float) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare <16 x float> @llvm.ceil.v16f32(<16 x float>) + +declare double @llvm.ceil.f64(double) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) + +declare float @llvm.floor.f32(float) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare <16 x float> @llvm.floor.v16f32(<16 x float>) + +declare double @llvm.floor.f64(double) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <4 x double> @llvm.floor.v4f64(<4 x double>) +declare <8 x double> @llvm.floor.v8f64(<8 x double>) + +declare float @llvm.nearbyint.f32(float) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>) +declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>) + +declare double @llvm.nearbyint.f64(double) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) +declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>) + +declare float @llvm.rint.f32(float) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <8 x float> @llvm.rint.v8f32(<8 x float>) +declare <16 x float> @llvm.rint.v16f32(<16 x float>) + +declare double @llvm.rint.f64(double) +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <4 x double> @llvm.rint.v4f64(<4 x double>) +declare <8 x double> @llvm.rint.v8f64(<8 x double>) + +declare float @llvm.trunc.f32(float) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) +declare <16 x float> @llvm.trunc.v16f32(<16 x float>) + +declare double @llvm.trunc.f64(double) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/gep.ll b/llvm/test/Analysis/CostModel/AMDGPU/gep.ll new file mode 100644 index 000000000000..da60908299be --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/gep.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define void @test_geps() { +; ALL-LABEL: 'test_geps' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'test_geps' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %a0 = getelementptr inbounds i8, i8* undef, i32 0 + %a1 = getelementptr inbounds i16, i16* undef, i32 0 + %a2 = getelementptr inbounds i32, i32* undef, i32 0 + %a3 = getelementptr inbounds i64, i64* undef, i32 0 + + %a4 = getelementptr inbounds float, float* undef, i32 0 + %a5 = getelementptr inbounds double, double* undef, i32 0 + + %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 + %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 + %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 + %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 + %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 + %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 + + ; Check that we handle outlandishly large GEPs properly. This is unlikely to + ; be a valid pointer, but LLVM still generates GEPs like this sometimes in + ; dead code. + ; This GEP has index INT64_MAX, which is cost 1. + %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807 + + ; This GEP index wraps around to -1, which is cost 0. + %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855 + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll index 1bdbb5b18920..e587240d8dbc 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll @@ -220,3 +220,562 @@ define amdgpu_kernel void @insertelement_i64(i32 %arg) { %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg ret void } + +define i32 @insert_double_poison(i32 %arg) { +; ALL-LABEL: 'insert_double_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_double_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg + %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 + %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 + + %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg + %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 + %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 + + %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg + %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 + %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 + %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 + %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 + + ret i32 undef +} + +define i32 @insert_float_poison(i32 %arg) { +; ALL-LABEL: 'insert_float_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_float_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg + %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 + %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 + + %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg + %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 + %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 + + %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg + %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 + %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 + %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 + %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 + + %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg + %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 + %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 + %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 + %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 + + ret i32 undef +} + +define i32 @insert_i64_poison(i32 %arg) { +; ALL-LABEL: 'insert_i64_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i64_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg + %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 + %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 + + %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg + %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 + %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 + + %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg + %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 + %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 + %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 + %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 + + ret i32 undef +} + +define i32 @insert_i32_poison(i32 %arg) { +; ALL-LABEL: 'insert_i32_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i32_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg + %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 + %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 + + %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg + %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 + %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 + + %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg + %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 + %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 + %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 + %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 + + %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg + %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 + %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 + %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 + %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 + + ret i32 undef +} + +define i32 @insert_i16_poison(i32 %arg) { +; CI-LABEL: 'insert_i16_poison' +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; CI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; GFX89-LABEL: 'insert_i16_poison' +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; GFX89-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; CI-SIZE-LABEL: 'insert_i16_poison' +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GFX89-SIZE-LABEL: 'insert_i16_poison' +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg + %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 + %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 + + %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg + %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 + %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 + + %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg + %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 + %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 + + %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg + %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 + %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 + %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 + %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 + + %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg + %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 + %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 + %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 + %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 + %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 + %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 + %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 + + ret i32 undef +} + +define i32 @insert_i8_poison(i32 %arg) { +; ALL-LABEL: 'insert_i8_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i8_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg + %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 + %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 + + %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg + %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 + %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 + + %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg + %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 + %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 + + %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg + %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 + %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 + %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 + + %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg + %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 + %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 + %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 + %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 + %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 + %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 + + %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg + %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 + %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 + %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 + %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 + %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 + %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 + %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 + %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 + %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 + + ret i32 undef +} + +define i32 @insert_i1_poison(i32 %arg) { +; ALL-LABEL: 'insert_i1_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i1_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg + %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 + %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 + + %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg + %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 + %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 + + %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg + %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 + %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 + + %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg + %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 + %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 + %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 + + %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg + %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 + %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 + %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 + %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 + %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 + %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 + + %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg + %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 + %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 + %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 + %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 + %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 + %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 + %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 + %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 + %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll index 824b7e1ee93d..8fddcbefaf76 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -112,4 +112,404 @@ define amdgpu_kernel void @mul_i16() #0 { ret void } +define i32 @mul_constpow2() { +; SLOW16-LABEL: 'mul_constpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_constpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_constpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_constpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, 16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, 16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, 16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, 16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + +define i32 @mul_uniformconstpow2() { +; SLOW16-LABEL: 'mul_uniformconstpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_uniformconstpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_uniformconstpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_uniformconstpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, 16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, 16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, 16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, 16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + +define i32 @mul_constnegpow2() { +; SLOW16-LABEL: 'mul_constnegpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_constnegpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_constnegpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_constnegpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, -16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, -16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, -16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, -16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + +define i32 @mul_uniformconstnegpow2() { +; SLOW16-LABEL: 'mul_uniformconstnegpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_uniformconstnegpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_uniformconstnegpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_uniformconstnegpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, -16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, -16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, -16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, -16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll new file mode 100644 index 000000000000..a2d38d2314bd --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll @@ -0,0 +1,1145 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define i32 @srem() { +; FAST-LABEL: 'srem' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, undef + %V2i64 = srem <2 x i64> undef, undef + %V4i64 = srem <4 x i64> undef, undef + %V8i64 = srem <8 x i64> undef, undef + + %I32 = srem i32 undef, undef + %V4i32 = srem <4 x i32> undef, undef + %V8i32 = srem <8 x i32> undef, undef + %V16i32 = srem <16 x i32> undef, undef + + %I16 = srem i16 undef, undef + %V8i16 = srem <8 x i16> undef, undef + %V16i16 = srem <16 x i16> undef, undef + %V32i16 = srem <32 x i16> undef, undef + + %I8 = srem i8 undef, undef + %V16i8 = srem <16 x i8> undef, undef + %V32i8 = srem <32 x i8> undef, undef + %V64i8 = srem <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @urem() { +; FAST-LABEL: 'urem' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, undef + %V2i64 = urem <2 x i64> undef, undef + %V4i64 = urem <4 x i64> undef, undef + %V8i64 = urem <8 x i64> undef, undef + + %I32 = urem i32 undef, undef + %V4i32 = urem <4 x i32> undef, undef + %V8i32 = urem <8 x i32> undef, undef + %V16i32 = urem <16 x i32> undef, undef + + %I16 = urem i16 undef, undef + %V8i16 = urem <8 x i16> undef, undef + %V16i16 = urem <16 x i16> undef, undef + %V32i16 = urem <32 x i16> undef, undef + + %I8 = urem i8 undef, undef + %V16i8 = urem <16 x i8> undef, undef + %V32i8 = urem <32 x i8> undef, undef + %V64i8 = urem <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @srem_const() { +; FAST-LABEL: 'srem_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 7 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 7 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 7 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 7 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_const() { +; FAST-LABEL: 'urem_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 7 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 7 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 7 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 7 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_uniformconst() { +; FAST-LABEL: 'srem_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 7 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 7 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 7 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 7 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_uniformconst() { +; FAST-LABEL: 'urem_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 7 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 7 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 7 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 7 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_constpow2() { +; FAST-LABEL: 'srem_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_constpow2() { +; FAST-LABEL: 'urem_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_uniformconstpow2() { +; FAST-LABEL: 'srem_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_uniformconstpow2() { +; FAST-LABEL: 'urem_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_constnegpow2() { +; FAST-LABEL: 'srem_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, -16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, -16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, -16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, -16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_constnegpow2() { +; FAST-LABEL: 'urem_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, -16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, -16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, -16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, -16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_uniformconstnegpow2() { +; FAST-LABEL: 'srem_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, -16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, -16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, -16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, -16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_uniformconstnegpow2() { +; FAST-LABEL: 'urem_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, -16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, -16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, -16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, -16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 2da155797fa9..13a6fe72c2f0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -12,14 +12,6 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" ; BIG-ENDIAN: unable to translate in big endian mode -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<28 x s32>) = G_CONCAT_VECTORS %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>) (in function: odd_vector) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector -; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: -define void @odd_vector(<7 x i32>* %addr) { - %vec = load <7 x i32>, <7 x i32>* %addr - store <7 x i32> %vec, <7 x i32>* %addr - ret void -} ; Make sure we don't mess up metadata arguments. declare void @llvm.write_register.i64(metadata, i64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll index bee323ad69d4..6c2d5e6967ba 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll @@ -4,12 +4,13 @@ define i8 @v1s8_add(<1 x i8> %a0) { ; CHECK-LABEL: name: v1s8_add ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %res = bitcast <1 x i8> %a0 to i8 ret i8 %res } @@ -17,16 +18,17 @@ define i8 @v1s8_add(<1 x i8> %a0) { define i24 @test_v3i8(<3 x i8> %a) { ; CHECK-LABEL: name: test_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %res = bitcast <3 x i8> %a to i24 ret i24 %res } @@ -35,12 +37,13 @@ define i24 @test_v3i8(<3 x i8> %a) { define <1 x half> @test_v1s16(<1 x float> %x) { ; CHECK-LABEL: name: test_v1s16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) - ; CHECK: $h0 = COPY [[FPTRUNC]](s16) - ; CHECK: RET_ReallyLR implicit $h0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) + ; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 %tmp = fptrunc <1 x float> %x to <1 x half> ret <1 x half> %tmp } @@ -49,17 +52,16 @@ declare <3 x float> @bar(float) define void @test_return_v3f32() { ; CHECK-LABEL: name: test_return_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $s0 = COPY [[DEF]](s32) - ; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $s0 = COPY [[DEF]](s32) + ; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR %call = call <3 x float> @bar(float undef) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir index de94098e2e9c..cfce6fd39022 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -10,16 +10,17 @@ body: | liveins: $w0, $w1 ; CHECK-LABEL: name: s32 ; CHECK: liveins: $w0, $w1 - ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: %y:_(s32) = COPY $w1 - ; CHECK: [[SADDO:%[0-9]+]]:_(s32), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %saddsat:_(s32) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] - ; CHECK: $w0 = COPY %saddsat(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SADDO:%[0-9]+]]:_(s32), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] + ; CHECK-NEXT: $w0 = COPY %saddsat(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 %y:_(s32) = COPY $w1 %saddsat:_(s32) = G_SADDSAT %x, %y @@ -35,16 +36,17 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: s64 ; CHECK: liveins: $x0, $x1 - ; CHECK: %x:_(s64) = COPY $x0 - ; CHECK: %y:_(s64) = COPY $x1 - ; CHECK: [[SADDO:%[0-9]+]]:_(s64), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %saddsat:_(s64) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] - ; CHECK: $x0 = COPY %saddsat(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %y:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SADDO:%[0-9]+]]:_(s64), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] + ; CHECK-NEXT: $x0 = COPY %saddsat(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 %y:_(s64) = COPY $x1 %saddsat:_(s64) = G_SADDSAT %x, %y @@ -61,22 +63,23 @@ body: | ; CHECK-LABEL: name: s16 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s16) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -96,22 +99,23 @@ body: | ; CHECK-LABEL: name: s1 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s1) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -131,22 +135,23 @@ body: | ; CHECK-LABEL: name: s3 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s3) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -166,23 +171,24 @@ body: | ; CHECK-LABEL: name: s36 ; CHECK: liveins: $x0, $x1 - ; CHECK: %copy_1:_(s64) = COPY $x0 - ; CHECK: %copy_2:_(s64) = COPY $x1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 - ; CHECK: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy_2:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 %x:_(s36) = G_TRUNC %copy_1(s64) %copy_2:_(s64) = COPY $x1 @@ -201,47 +207,47 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: s88 ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: %copy_1:_(s128) = COPY $q0 - ; CHECK: %copy_2:_(s128) = COPY $q1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) - ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64) - ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) - ; CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) - ; CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) - ; CHECK: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO2]], [[MV6]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s128) = COPY $q0 + ; CHECK-NEXT: %copy_2:_(s128) = COPY $q1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[EXTRACT]], [[EXTRACT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO2]], [[MV6]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 %x:_(s88) = G_TRUNC %copy_1(s128) %copy_2:_(s128) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 9b5bfc13d50b..bf3619c4210f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -10,11 +10,12 @@ body: | ; CHECK-LABEL: name: shuffle_v4i32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 0, 0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 0, 0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 0, 0, 0) @@ -32,11 +33,12 @@ body: | ; CHECK-LABEL: name: shuffle_v2i64 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], shufflemask(0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], shufflemask(0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = COPY $q1 %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(0, 0) @@ -54,11 +56,12 @@ body: | ; CHECK-LABEL: name: shuffle_v2p0 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<2 x p0>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x p0>) = COPY $q0 %1:_(<2 x p0>) = COPY $q1 %2:_(<2 x p0>) = G_SHUFFLE_VECTOR %0(<2 x p0>), %1, shufflemask(0, 0) @@ -76,11 +79,12 @@ body: | ; CHECK-LABEL: name: shuffle_v16i8 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s8>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<16 x s8>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s8>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<16 x s8>) = COPY $q0 %1:_(<16 x s8>) = COPY $q1 %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -98,11 +102,12 @@ body: | ; CHECK-LABEL: name: shuffle_v8i16 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s16>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<8 x s16>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s16>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = COPY $q1 %2:_(<8 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %1, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) @@ -120,13 +125,14 @@ body: | ; CHECK-LABEL: name: shuffle_1elt_mask ; CHECK: liveins: $d0, $d1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; CHECK: $d0 = COPY [[COPY2]](s64) - ; CHECK: $d1 = COPY [[COPY3]](s64) - ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: $d0 = COPY [[COPY2]](s64) + ; CHECK-NEXT: $d1 = COPY [[COPY3]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 %0:_(s64) = COPY $d0 %1:_(s64) = COPY $d1 %3:_(s64) = G_SHUFFLE_VECTOR %0:_(s64), %1:_, shufflemask(0) @@ -146,18 +152,19 @@ body: | ; CHECK-LABEL: name: oversize_shuffle_v4i64 ; CHECK: liveins: $q0, $q1, $q2, $q3, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 - ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY1]](<2 x s64>), [[COPY2]], shufflemask(1, 2) - ; CHECK: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2) - ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store (<2 x s64>), align 32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64) - ; CHECK: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY1]](<2 x s64>), [[COPY2]], shufflemask(1, 2) + ; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2) + ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store (<2 x s64>), align 32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) + ; CHECK-NEXT: RET_ReallyLR %3:_(<2 x s64>) = COPY $q0 %4:_(<2 x s64>) = COPY $q1 %0:_(<4 x s64>) = G_CONCAT_VECTORS %3(<2 x s64>), %4(<2 x s64>) @@ -180,26 +187,27 @@ body: | ; CHECK-LABEL: name: oversize_shuffle_v8i32_build_vector ; CHECK: liveins: $q0, $q1, $q2, $q3, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 - ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C2]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<4 x s32>), [[C3]](s64) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store (<4 x s32>), align 32) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64) - ; CHECK: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store (<4 x s32>), align 32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) + ; CHECK-NEXT: RET_ReallyLR %3:_(<4 x s32>) = COPY $q0 %4:_(<4 x s32>) = COPY $q1 %0:_(<8 x s32>) = G_CONCAT_VECTORS %3(<4 x s32>), %4(<4 x s32>) @@ -229,42 +237,45 @@ body: | ; CHECK-LABEL: name: oversize_shuffle_v6i64 ; CHECK: liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) - ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) - ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) - ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]](<2 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) + ; CHECK-NEXT: RET_ReallyLR %3:_(s64) = COPY $d0 %4:_(s64) = COPY $d1 %5:_(s64) = COPY $d2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir index b1ab6d4cee5e..10d365f993b2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -10,16 +10,17 @@ body: | liveins: $w0, $w1 ; CHECK-LABEL: name: s32 ; CHECK: liveins: $w0, $w1 - ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: %y:_(s32) = COPY $w1 - ; CHECK: [[SSUBO:%[0-9]+]]:_(s32), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] - ; CHECK: $w0 = COPY %ssubsat(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SSUBO:%[0-9]+]]:_(s32), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: $w0 = COPY %ssubsat(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 %y:_(s32) = COPY $w1 %ssubsat:_(s32) = G_SSUBSAT %x, %y @@ -35,16 +36,17 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: s64 ; CHECK: liveins: $x0, $x1 - ; CHECK: %x:_(s64) = COPY $x0 - ; CHECK: %y:_(s64) = COPY $x1 - ; CHECK: [[SSUBO:%[0-9]+]]:_(s64), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] - ; CHECK: $x0 = COPY %ssubsat(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %y:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SSUBO:%[0-9]+]]:_(s64), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: $x0 = COPY %ssubsat(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 %y:_(s64) = COPY $x1 %ssubsat:_(s64) = G_SSUBSAT %x, %y @@ -61,22 +63,23 @@ body: | ; CHECK-LABEL: name: s16 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s16) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -96,22 +99,23 @@ body: | ; CHECK-LABEL: name: s1 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s1) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -131,22 +135,23 @@ body: | ; CHECK-LABEL: name: s3 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s3) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -166,23 +171,24 @@ body: | ; CHECK-LABEL: name: s36 ; CHECK: liveins: $x0, $x1 - ; CHECK: %copy_1:_(s64) = COPY $x0 - ; CHECK: %copy_2:_(s64) = COPY $x1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 - ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy_2:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 %x:_(s36) = G_TRUNC %copy_1(s64) %copy_2:_(s64) = COPY $x1 @@ -201,47 +207,47 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: s88 ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: %copy_1:_(s128) = COPY $q0 - ; CHECK: %copy_2:_(s128) = COPY $q1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) - ; CHECK: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBO]](s64) - ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) - ; CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) - ; CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO]], [[MV6]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s128) = COPY $q0 + ; CHECK-NEXT: %copy_2:_(s128) = COPY $q1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[EXTRACT]], [[EXTRACT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBO]](s64) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO]], [[MV6]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 %x:_(s88) = G_TRUNC %copy_1(s128) %copy_2:_(s128) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll b/llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll new file mode 100644 index 000000000000..a2547782f6b2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: test_combine_v8i16_to_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.8b, v0.8h, v2.8h +; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret +entry: + %res = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %x, i32 8) + %res2 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %y, i32 8) + %shuffle = shufflevector <8 x i8> %res, <8 x i8> %res2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_combine_v4i32_to_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.4h, v0.4s, v2.4s +; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret +entry: + %res = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %x, i32 16) + %res2 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %y, i32 16) + %shuffle = shufflevector <4 x i16> %res, <4 x i16> %res2, <8 x i32> + ret <8 x i16> %shuffle +} + +define <4 x i32> @test_combine_v2i64_to_v4i32(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: test_combine_v2i64_to_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.2s, v0.2d, v2.2d +; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret +entry: + %res = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %x, i32 32) + %res2 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %y, i32 32) + %shuffle = shufflevector <2 x i32> %res, <2 x i32> %res2, <4 x i32> + ret <4 x i32> %shuffle +} + +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) diff --git a/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll b/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll new file mode 100644 index 000000000000..f09109c282a2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +; Test the (concat_vectors (X), (trunc(smin(smax(Y, -2^n), 2^n-1))) pattern. + +define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) { +; CHECK-LABEL: test_combine_v8i16_to_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqxtn2 v0.16b, v1.8h +; CHECK-NEXT: ret +entry: + %min = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %y, <8 x i16> ) + %max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %min, <8 x i16> ) + %trunc = trunc <8 x i16> %max to <8 x i8> + %shuffle = shufflevector <8 x i8> %x, <8 x i8> %trunc, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) { +; CHECK-LABEL: test_combine_v4i32_to_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NEXT: ret +entry: + %max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> ) + %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %max, <4 x i32> ) + %trunc = trunc <4 x i32> %min to <4 x i16> + %shuffle = shufflevector <4 x i16> %x, <4 x i16> %trunc, <8 x i32> + ret <8 x i16> %shuffle +} + +declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 0030e2fa94b0..b66ba95daf80 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -315,17 +315,12 @@ entry: define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-CVT-LABEL: stest_f16i16: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 -; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s +; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16: @@ -1028,17 +1023,12 @@ entry: define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-LABEL: stest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 -; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s +; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16_mm: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir index b7ec21b576af..336521199709 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -326,8 +326,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BITCAST]](s128) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 2a7db868d005..507961fcffe8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s --- name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir index 71eee774acbe..b0fb33b34061 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -74,17 +74,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s32>) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir index f7bf2b25e95b..2ec0cb5a66e1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -30,12 +30,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 - ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 - ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -62,12 +64,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 - ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 - ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll index 657184620fa6..a407de876bbf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -628,23 +628,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v6 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-NEXT: v_and_or_b32 v3, v3, v9, s4 -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX9-NEXT: v_and_or_b32 v2, v4, v9, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX9-NEXT: v_and_or_b32 v3, v5, v9, s4 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v8 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v3, v4, v9, v3 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX9-NEXT: v_and_or_b32 v4, v5, v9, s4 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 -; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v4 -; GFX9-NEXT: v_pk_add_f16 v1, v3, v1 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-NEXT: v_pk_add_f16 v1, v4, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -657,18 +659,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 -; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v6 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v6 -; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs: @@ -680,23 +684,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v6 ; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-DENORM-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-DENORM-NEXT: v_and_or_b32 v3, v3, v9, s4 -; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX9-DENORM-NEXT: v_and_or_b32 v2, v4, v9, v2 -; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX9-DENORM-NEXT: v_and_or_b32 v3, v5, v9, s4 -; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v8 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v3, v4, v9, v3 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX9-DENORM-NEXT: v_and_or_b32 v4, v5, v9, s4 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 -; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v4 -; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v3, v1 -; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v4, v1 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; @@ -709,18 +715,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6 -; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v6 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4 ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v6 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_3xhalf_add_mul_rhs: @@ -734,23 +742,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-NEXT: v_and_or_b32 v3, v3, v8, s4 ; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6 ; GFX10-NEXT: v_and_or_b32 v2, v2, v8, v7 +; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX10-NEXT: v_and_or_b32 v2, v3, v8, s4 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-NEXT: v_and_or_b32 v2, v4, v8, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6 +; GFX10-NEXT: v_pk_add_f16 v0, v2, v0 ; GFX10-NEXT: v_and_or_b32 v2, v5, v8, s4 -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v6 -; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_and_or_b32 v3, v4, v8, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_pk_add_f16 v1, v2, v1 -; GFX10-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs: @@ -758,23 +768,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v3, v3, v7, s4 -; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v5, v5, v7, s4 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v7, v6 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v7, v8 -; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v7, v9 -; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v7 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v8 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v3, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v5, v9, s4 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v3 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs: @@ -788,23 +800,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v3, v3, v8, s4 ; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6 ; GFX10-DENORM-NEXT: v_and_or_b32 v2, v2, v8, v7 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX10-DENORM-NEXT: v_and_or_b32 v2, v3, v8, s4 -; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 ; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-DENORM-NEXT: v_and_or_b32 v2, v4, v8, v2 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 ; GFX10-DENORM-NEXT: v_and_or_b32 v2, v5, v8, s4 -; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v5, 16, v6 -; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 -; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-DENORM-NEXT: v_and_or_b32 v3, v4, v8, v3 -; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v5 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v2, v1 -; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v3 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: @@ -812,23 +826,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v3, v3, v7, s4 -; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v5, v5, v7, s4 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v7, v6 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v7, v8 -; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v7, v9 -; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v7 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v8 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v3, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v5, v9, s4 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v3 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul <3 x half> %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 3dfd3ab5a15a..122b5e65df6e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -1,19 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -global-isel-abort=0 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s ; FIXME: Also test with a pre-gfx8 target. define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -21,14 +22,15 @@ define i1 @i1_func_void() #0 { define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -36,14 +38,15 @@ define zeroext i1 @i1_zeroext_func_void() #0 { define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -51,14 +54,15 @@ define signext i1 @i1_signext_func_void() #0 { define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -66,14 +70,15 @@ define i7 @i7_func_void() #0 { define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -81,14 +86,15 @@ define zeroext i7 @i7_zeroext_func_void() #0 { define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -96,14 +102,15 @@ define signext i7 @i7_signext_func_void() #0 { define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -111,14 +118,15 @@ define i8 @i8_func_void() #0 { define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -126,14 +134,15 @@ define zeroext i8 @i8_zeroext_func_void() #0 { define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -141,14 +150,15 @@ define signext i8 @i8_signext_func_void() #0 { define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -156,14 +166,15 @@ define i16 @i16_func_void() #0 { define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -171,14 +182,15 @@ define zeroext i16 @i16_zeroext_func_void() #0 { define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -186,14 +198,15 @@ define signext i16 @i16_signext_func_void() #0 { define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val } @@ -201,14 +214,15 @@ define half @f16_func_void() #0 { define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -216,14 +230,15 @@ define i24 @i24_func_void() #0 { define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -231,14 +246,15 @@ define zeroext i24 @i24_zeroext_func_void() #0 { define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -246,17 +262,18 @@ define signext i24 @i24_signext_func_void() #0 { define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val } @@ -264,19 +281,20 @@ define <2 x i24> @v2i24_func_void() #0 { define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val } @@ -284,13 +302,14 @@ define <3 x i24> @v3i24_func_void() #0 { define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val } @@ -298,16 +317,17 @@ define i32 @i32_func_void() #0 { define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -315,16 +335,17 @@ define i48 @i48_func_void() #0 { define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -332,16 +353,17 @@ define signext i48 @i48_signext_func_void() #0 { define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -349,15 +371,16 @@ define zeroext i48 @i48_zeroext_func_void() #0 { define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val } @@ -365,17 +388,18 @@ define i64 @i64_func_void() #0 { define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -383,17 +407,18 @@ define i65 @i65_func_void() #0 { define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -401,17 +426,18 @@ define signext i65 @i65_signext_func_void() #0 { define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -419,13 +445,14 @@ define zeroext i65 @i65_zeroext_func_void() #0 { define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val } @@ -433,15 +460,16 @@ define float @f32_func_void() #0 { define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val } @@ -449,17 +477,18 @@ define double @f64_func_void() #0 { define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val } @@ -467,15 +496,16 @@ define <2 x double> @v2f64_func_void() #0 { define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val } @@ -483,16 +513,17 @@ define <2 x i32> @v2i32_func_void() #0 { define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val } @@ -500,17 +531,18 @@ define <3 x i32> @v3i32_func_void() #0 { define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val } @@ -518,18 +550,19 @@ define <4 x i32> @v4i32_func_void() #0 { define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val } @@ -537,22 +570,23 @@ define <5 x i32> @v5i32_func_void() #0 { define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr ret <8 x i32> %val @@ -561,30 +595,31 @@ define <8 x i32> @v8i32_func_void() #0 { define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr ret <16 x i32> %val @@ -593,46 +628,47 @@ define <16 x i32> @v16i32_func_void() #0 { define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr ret <32 x i32> %val @@ -641,17 +677,18 @@ define <32 x i32> @v32i32_func_void() #0 { define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val } @@ -659,20 +696,21 @@ define <2 x i64> @v2i64_func_void() #0 { define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr ret <3 x i64> %val @@ -681,22 +719,23 @@ define <3 x i64> @v3i64_func_void() #0 { define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr ret <4 x i64> %val @@ -705,24 +744,25 @@ define <4 x i64> @v4i64_func_void() #0 { define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr ret <5 x i64> %val @@ -731,30 +771,31 @@ define <5 x i64> @v5i64_func_void() #0 { define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr ret <8 x i64> %val @@ -763,46 +804,47 @@ define <8 x i64> @v8i64_func_void() #0 { define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr ret <16 x i64> %val @@ -811,13 +853,14 @@ define <16 x i64> @v16i64_func_void() #0 { define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val } @@ -825,13 +868,14 @@ define <2 x i16> @v2i16_func_void() #0 { define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val } @@ -839,17 +883,19 @@ define <2 x half> @v2f16_func_void() #0 { define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val } @@ -857,15 +903,16 @@ define <3 x i16> @v3i16_func_void() #0 { define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val } @@ -873,15 +920,16 @@ define <4 x i16> @v4i16_func_void() #0 { define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val } @@ -889,19 +937,21 @@ define <4 x half> @v4f16_func_void() #0 { define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr ret <5 x i16> %val @@ -910,18 +960,19 @@ define <5 x i16> @v5i16_func_void() #0 { define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr ret <8 x i16> %val @@ -930,22 +981,23 @@ define <8 x i16> @v8i16_func_void() #0 { define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr ret <16 x i16> %val @@ -954,62 +1006,63 @@ define <16 x i16> @v16i16_func_void() #0 { define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr ret <16 x i8> %val @@ -1018,19 +1071,20 @@ define <16 x i8> @v16i8_func_void() #0 { define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val } @@ -1038,22 +1092,23 @@ define <2 x i8> @v2i8_func_void() #0 { define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val } @@ -1061,26 +1116,27 @@ define <3 x i8> @v3i8_func_void() #0 { define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr ret <4 x i8> %val @@ -1089,18 +1145,19 @@ define <4 x i8> @v4i8_func_void() #0 { define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -1108,19 +1165,20 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 @@ -1137,15 +1195,16 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i define <33 x i32> @v33i32_func_void() #0 { ; CHECK-LABEL: name: v33i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr ret <33 x i32> %val @@ -1154,22 +1213,23 @@ define <33 x i32> @v33i32_func_void() #0 { define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep ret <33 x i32> %val @@ -1178,21 +1238,22 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-LABEL: name: struct_v32i32_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr ret { <32 x i32>, i32 }%val @@ -1201,21 +1262,22 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-LABEL: name: struct_i32_v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr ret { i32, <32 x i32> }%val @@ -1225,28 +1287,29 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef %load2 = load volatile i32, i32 addrspace(3)* undef @@ -1263,29 +1326,30 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef %load2 = load volatile float, float addrspace(3)* undef @@ -1302,22 +1366,23 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 @@ -1333,46 +1398,47 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1380,46 +1446,47 @@ define i1022 @i1022_func_void() #0 { define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1427,46 +1494,47 @@ define signext i1022 @i1022_signext_func_void() #0 { define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1476,32 +1544,33 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll index 3d4acd79132d..e78d959f0af5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -14,9 +14,9 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 ; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 ; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 +; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 ; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 -; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 @@ -38,8 +38,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208 ; GCN-NEXT: s_waitcnt vmcnt(7) ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 ; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 @@ -48,6 +46,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 ; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 ; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: s_endpgm ; ; GFX10-LABEL: v_insert_v64i32_37: @@ -58,13 +58,13 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX10-NEXT: s_clause 0xf ; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] ; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 ; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 ; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 @@ -84,8 +84,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:192 ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208 -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 @@ -94,6 +92,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir index d6d2a74cbcc2..9580520d4216 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -198,156 +198,3 @@ body: | S_ENDPGM 0, implicit %1, implicit %2 ... - ---- -name: extract_sgpr_s32_from_v3s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_s32_from_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s32) = G_EXTRACT %0, 0 - %2:sgpr(s32) = G_EXTRACT %0, 32 - %3:sgpr(s32) = G_EXTRACT %0, 64 - S_ENDPGM 0, implicit %0, implicit %2, implicit %3 - -... - ---- -name: extract_sgpr_v2s32_from_v3s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_v2s32_from_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(<2 x s32>) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - ---- -name: extract_sgpr_v3s32_from_v4s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3 - ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<3 x s32>) = G_EXTRACT %0, 0 - %2:sgpr(<3 x s32>) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1, implicit %2 - -... - ---- -name: extract_sgpr_v2s16_from_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - ---- -name: extract_sgpr_v2s16_from_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v6s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s16) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index 4b6628472c8b..7d8e42e39fac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -32,8 +32,7 @@ body: | %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 - %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) %15:sgpr(p1) = G_INTTOPTR %13(s64) %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FCONSTANT double -0.000000e+00 @@ -82,8 +81,7 @@ body: | %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 - %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) %15:sgpr(p1) = G_INTTOPTR %13(s64) %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FABS %18 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index 10b9280837a7..57dbe204ecc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -566,42 +566,3 @@ body: | %2:vgpr(s256) = G_INSERT %0, %1, 128 S_ENDPGM 0, implicit %2 ... - ---- -name: insert_sgpr_v2s16_to_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 - ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = COPY $sgpr2 - %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 - -... - ---- -name: insert_sgpr_v2s16_to_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = COPY $sgpr2 - %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir deleted file mode 100644 index 150b341561f9..000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir +++ /dev/null @@ -1,39 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s - -# ERR: remark: :0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0) -# ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0) -# ERR-NOT: remark - -# FIXME: This 16-bit insert source should not be legal and this test -# should be deleted ---- -name: insert_sgpr_s16_to_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s16) = G_TRUNC %1 - %3:sgpr(<4 x s16>) = G_INSERT %0, %2, 0 - S_ENDPGM 0, implicit %3 - -... - -# getSubRegFromChannel current does not handle cases > 128-bits ---- -name: insert_sgpr_v8s32_to_v16s32_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 - %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(<8 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 - %2:sgpr(<16 x s32>) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 - -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 17d8f497cbcd..47461bf5e17d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -2078,11 +2078,11 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef @@ -2252,11 +2252,11 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() store volatile <3 x half> %val, <3 x half> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index d81a40bfe6d0..419c8f920f48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -2101,11 +2101,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2161,11 +2162,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2339,12 +2341,13 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2400,13 +2403,14 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2462,44 +2466,45 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2555,47 +2560,48 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 70a41f9dafaa..f29e6456d090 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1230,12 +1230,12 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef @@ -1267,12 +1267,12 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef @@ -1363,12 +1363,12 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef @@ -1680,12 +1680,12 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x half> %arg0, <3 x half> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index d6da4fca2198..7a0176d6233f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -1383,9 +1383,9 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -1396,11 +1396,12 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) + ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) + ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index a8ea15ced70a..9acec3f56876 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_and_s32 @@ -9,9 +9,9 @@ body: | ; CHECK-LABEL: name: test_and_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_AND %0, %1 @@ -26,9 +26,9 @@ body: | ; CHECK-LABEL: name: test_and_s1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: S_NOP 0, implicit [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_CONSTANT i32 0 @@ -46,22 +46,22 @@ body: | ; CHECK-LABEL: name: test_and_v2s1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -80,26 +80,26 @@ body: | ; CHECK-LABEL: name: test_and_v3s1 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP3]] - ; CHECK: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP4]] - ; CHECK: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP2]], [[ICMP5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -118,9 +118,9 @@ body: | ; CHECK-LABEL: name: test_and_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_AND %0, %1 @@ -135,16 +135,16 @@ body: | ; CHECK-LABEL: name: test_and_s96 ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_AND %0, %1 @@ -159,13 +159,13 @@ body: | ; CHECK-LABEL: name: test_and_128 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(s128) = G_AND %0, %1 @@ -180,9 +180,9 @@ body: | ; CHECK-LABEL: name: test_and_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -200,9 +200,9 @@ body: | ; CHECK-LABEL: name: test_and_s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -220,12 +220,12 @@ body: | ; CHECK-LABEL: name: test_and_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -243,9 +243,9 @@ body: | ; CHECK-LABEL: name: test_and_s24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 @@ -263,9 +263,9 @@ body: | ; CHECK-LABEL: name: test_and_s48 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %0 @@ -283,9 +283,9 @@ body: | ; CHECK-LABEL: name: test_and_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_AND %0, %1 @@ -300,20 +300,16 @@ body: | ; CHECK-LABEL: name: test_and_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_AND %0, %1 @@ -328,13 +324,13 @@ body: | ; CHECK-LABEL: name: test_and_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV]], [[UV2]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV1]], [[UV3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = G_AND %0, %1 @@ -348,25 +344,22 @@ body: | ; CHECK-LABEL: name: test_and_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_AND %0, %1 @@ -383,13 +376,13 @@ body: | ; CHECK-LABEL: name: test_and_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_AND %0, %1 @@ -404,9 +397,9 @@ body: | ; CHECK-LABEL: name: test_and_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_AND %0, %1 @@ -420,41 +413,66 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_and_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -474,9 +492,9 @@ body: | ; CHECK-LABEL: name: test_and_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<4 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_AND %0, %1 @@ -490,54 +508,102 @@ body: | ; CHECK-LABEL: name: test_and_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] - ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND11]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_AND %0, %1 @@ -552,34 +618,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_AND %0, %1 @@ -594,15 +641,15 @@ body: | ; CHECK-LABEL: name: test_and_v4s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_AND %0, %1 @@ -617,19 +664,19 @@ body: | ; CHECK-LABEL: name: test_and_v8s8 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(<8 x s8>) = G_IMPLICIT_DEF %1:_(<8 x s8>) = G_IMPLICIT_DEF %2:_(<8 x s8>) = G_AND %0, %1 @@ -644,29 +691,29 @@ body: | ; CHECK-LABEL: name: test_and_v16s8 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]] - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]] - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]] - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) %0:_(<16 x s8>) = G_IMPLICIT_DEF %1:_(<16 x s8>) = G_IMPLICIT_DEF %2:_(<16 x s8>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir index ffbf3f55dfb2..c6f0a6dc349c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -114,10 +114,7 @@ body: | ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index 5c982bd92352..b1165aa5cd48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s --- name: test_ashr_s32_s32 @@ -532,43 +532,40 @@ body: | ; SI-LABEL: name: test_ashr_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -798,32 +795,33 @@ body: | ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9PLUS-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9PLUS-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9PLUS-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 6188cb9b4e9e..c7c4e0734200 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -9,8 +9,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s32_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(<2 x s16>) = G_BITCAST %0 $vgpr0 = COPY %1 @@ -24,8 +24,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = G_BITCAST %0 $vgpr0 = COPY %1 @@ -39,8 +39,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s64) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -54,8 +54,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -69,8 +69,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<4 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -84,8 +84,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -99,8 +99,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s128_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](s128) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](s128) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -114,8 +114,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_s128 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](s128) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s128) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s128) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -129,8 +129,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_s64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s64) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -144,8 +144,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](s64) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<4 x s16>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -159,8 +159,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v8s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<8 x s16>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -174,8 +174,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s16_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<8 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<8 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -189,8 +189,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p0_to_p1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -204,8 +204,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p1_to_p0 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -219,8 +219,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p999_to_p0 ; CHECK: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p999) = COPY $vgpr0_vgpr1 %1:_(p0) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -234,8 +234,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p123_to_p999 ; CHECK: [[COPY:%[0-9]+]]:_(p123) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p123) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p123) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) %0:_(p123) = COPY $vgpr0_vgpr1 %1:_(p999) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -251,9 +251,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s64_to_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY]](<4 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<8 x s32>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY]](<4 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<8 x s32>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s32>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -269,9 +270,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s32_to_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<8 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<8 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x s64>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<4 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -287,9 +289,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s64_to_v16s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<16 x s32>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<16 x s32>) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s32>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -305,9 +308,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s32_to_v8s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[COPY]](<16 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[COPY]](<16 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<8 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -321,8 +325,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v32s32_to_v16s64 ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:_(<16 x s64>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -336,8 +340,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s64_to_v32s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) %0:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:_(<32 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -351,12 +355,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_s24_to_v3s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s24) = G_TRUNC %0 %2:_(<3 x s8>) = G_BITCAST %1 @@ -372,11 +376,11 @@ body: | ; CHECK-LABEL: name: test_bitcast_s48_to_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s48) = G_TRUNC %0 %2:_(<3 x s16>) = G_BITCAST %1 @@ -392,28 +396,28 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s8_to_s24 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK: $vgpr0 = COPY [[OR2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s8>) = G_TRUNC %0 %2:_(s24) = G_BITCAST %1 @@ -429,19 +433,19 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s16_to_s48 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(s48) = G_BITCAST %1 @@ -457,10 +461,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_s16_to_v2s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(<2 x s8>) = G_BITCAST %1 @@ -476,17 +480,17 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s8_to_s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 %2:_(s16) = G_BITCAST %1 @@ -503,32 +507,31 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_v4s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<4 x s8>) = G_BITCAST %0 %2:_(<4 x s8>) = G_ADD %1, %1 @@ -544,37 +547,36 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s8>) = G_TRUNC %0 %2:_(<4 x s8>) = G_ADD %1, %1 @@ -590,56 +592,55 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_v8s4 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<8 x s4>) = G_BITCAST %0 %2:_(<8 x s4>) = G_ADD %1, %1 @@ -655,63 +656,62 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND2]], [[C2]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ADD6]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C2]](s16) - ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ADD7]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) - ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND2]], [[C2]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ADD6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C2]](s16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ADD7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s4>) = G_TRUNC %0 %2:_(<8 x s4>) = G_ADD %1, %1 @@ -727,8 +727,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -742,42 +742,41 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_BITCAST %0 %2:_(<4 x s16>) = G_ADD %1, %1 @@ -793,51 +792,51 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v8s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC8]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC8]](<8 x s8>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -853,34 +852,41 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s8>) = G_TRUNC %0 %2:_(<2 x s32>) = G_BITCAST %1 @@ -895,44 +901,44 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_s64 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s8>) = G_TRUNC %0 %2:_(s64) = G_BITCAST %1 @@ -947,135 +953,135 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v16s4 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C4]](s32) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C5]](s32) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C6]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C4]](s32) - ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) - ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C7]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C7]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C7]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C7]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C7]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C7]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C7]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C7]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C7]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C3]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C7]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C7]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C7]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C3]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C7]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C7]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] - ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C6]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C7]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C7]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C7]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C7]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C7]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C7]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C7]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C7]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C3]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C7]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C7]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C7]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C3]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C7]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C7]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<16 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<16 x s4>) = G_BITCAST %0 %2:_(<16 x s4>) = G_ADD %1, %1 @@ -1092,78 +1098,93 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s4_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32) - ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32) - ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR6]](s32), [[OR13]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR6]](s32), [[OR13]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<16 x s4>) = G_TRUNC %0 %2:_(<2 x s32>) = G_BITCAST %1 @@ -1178,38 +1199,38 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v8s8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -1225,70 +1246,70 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s32_to_v12s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC12]](<12 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC12]](<12 x s8>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<12 x s8>) = G_BITCAST %0 %2:_(<12 x s8>) = G_ADD %1, %1 @@ -1304,47 +1325,47 @@ body: | ; CHECK-LABEL: name: test_bitcast_v12s8_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -1362,55 +1383,54 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL5]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<6 x s8>) = G_TRUNC %0 %2:_(<6 x s8>) = G_ADD %1, %1 @@ -1427,46 +1447,45 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s16_to_v6s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[ADD1]], [[C]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[ADD2]], [[C]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[ADD1]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[ADD2]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s16>) = G_ADD %1, %1 @@ -1483,65 +1502,65 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v16s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 %2:_(<16 x s8>) = G_ADD %1, %1 @@ -1557,79 +1576,94 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV24]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV25]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV26]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV27]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) - ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV28]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV29]](s32) - ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) - ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV30]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV31]](s32) - ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) - ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]] + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C]] + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s64>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s8>) = G_TRUNC %0 %2:_(<2 x s64>) = G_BITCAST %1 @@ -1643,88 +1677,88 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: test_bitcast_v4s32_to_v16s8 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 %2:_(<16 x s8>) = G_ADD %1, %1 @@ -1740,56 +1774,89 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV20]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV21]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV22]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV23]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[C]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[C]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV60]], [[C]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV61]], [[C]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV62]], [[C]] - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV63]], [[C]] - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s8>) = G_TRUNC %0 %2:_(<4 x s32>) = G_BITCAST %1 @@ -1804,67 +1871,67 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s16_to_v16s8 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR6]], [[LSHR6]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR7]], [[LSHR7]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR6]], [[LSHR6]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR7]], [[LSHR7]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 %2:_(<16 x s8>) = G_ADD %1, %1 @@ -1880,87 +1947,150 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV18]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV19]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV36]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV37]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV54]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV55]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV72]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV73]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV90]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV91]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) - ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV108]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV109]](s32) - ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) - ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV126]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV127]](s32) - ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) - ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C]] + ; CHECK-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8) + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8) + ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]] + ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s8>) = G_TRUNC %0 %2:_(<8 x s16>) = G_BITCAST %1 @@ -1975,8 +2105,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v6s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<6 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -1990,8 +2120,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s32_to_v3s64 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<3 x s64>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -2005,8 +2135,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v12s16 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<12 x s16>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -2020,8 +2150,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v12s16_to_v3s64 ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<3 x s64>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -2035,117 +2165,117 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v24s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<3 x s64>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; CHECK: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C1]](s16) - ; CHECK: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C1]](s16) - ; CHECK: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C1]](s16) - ; CHECK: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C1]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR15]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] - ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] - ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C2]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL8]] - ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] - ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C2]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL9]] - ; CHECK: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] - ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C2]] - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL10]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] - ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C2]] - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND22]], [[SHL11]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<3 x s64>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; CHECK-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C1]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR15]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C2]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C2]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] + ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C2]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] + ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C2]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND22]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<24 x s8>) = G_BITCAST %0 %2:_(<24 x s16>) = G_ANYEXT %1 @@ -2160,139 +2290,137 @@ body: | ; CHECK-LABEL: name: test_bitcast_v24s8_to_v3s64 ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[COPY1:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) - ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] - ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) - ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] - ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) - ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C1]] - ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C1]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C2]](s16) - ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) - ; CHECK: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; CHECK: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>) - ; CHECK: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>) - ; CHECK: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C1]] - ; CHECK: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C1]] - ; CHECK: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C2]](s16) - ; CHECK: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; CHECK: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C1]] - ; CHECK: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C1]] - ; CHECK: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C2]](s16) - ; CHECK: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; CHECK: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C1]] - ; CHECK: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C1]] - ; CHECK: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C2]](s16) - ; CHECK: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; CHECK: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C1]] - ; CHECK: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C1]] - ; CHECK: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C2]](s16) - ; CHECK: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; CHECK: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CHECK: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CHECK: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CHECK: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C2]](s16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C1]] + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C2]](s16) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C1]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C1]] + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C2]](s16) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] + ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C1]] + ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C1]] + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C2]](s16) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] + ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C1]] + ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C1]] + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C2]](s16) + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) + ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) + ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 %2:_(<24 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -2309,60 +2437,60 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_v8s8 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -2378,61 +2506,76 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[OR]], [[OR]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[OR1]], [[OR1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[OR2]], [[OR2]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[OR3]], [[OR3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[OR]], [[OR]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[OR1]], [[OR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[OR2]], [[OR2]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[OR3]], [[OR3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<8 x s8>) = G_TRUNC %0 %2:_(<4 x s16>) = G_BITCAST %1 @@ -2448,43 +2591,43 @@ body: | ; CHECK-LABEL: name: test_bitcast_v64s32_to_v32s64 ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>), [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>), [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>), [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>), [[UV20:%[0-9]+]]:_(<2 x s32>), [[UV21:%[0-9]+]]:_(<2 x s32>), [[UV22:%[0-9]+]]:_(<2 x s32>), [[UV23:%[0-9]+]]:_(<2 x s32>), [[UV24:%[0-9]+]]:_(<2 x s32>), [[UV25:%[0-9]+]]:_(<2 x s32>), [[UV26:%[0-9]+]]:_(<2 x s32>), [[UV27:%[0-9]+]]:_(<2 x s32>), [[UV28:%[0-9]+]]:_(<2 x s32>), [[UV29:%[0-9]+]]:_(<2 x s32>), [[UV30:%[0-9]+]]:_(<2 x s32>), [[UV31:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[UV]](<2 x s32>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[UV1]](<2 x s32>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s64) = G_BITCAST [[UV2]](<2 x s32>) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[UV3]](<2 x s32>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s64) = G_BITCAST [[UV4]](<2 x s32>) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s64) = G_BITCAST [[UV5]](<2 x s32>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s64) = G_BITCAST [[UV6]](<2 x s32>) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s64) = G_BITCAST [[UV7]](<2 x s32>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s64) = G_BITCAST [[UV8]](<2 x s32>) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s64) = G_BITCAST [[UV9]](<2 x s32>) - ; CHECK: [[BITCAST10:%[0-9]+]]:_(s64) = G_BITCAST [[UV10]](<2 x s32>) - ; CHECK: [[BITCAST11:%[0-9]+]]:_(s64) = G_BITCAST [[UV11]](<2 x s32>) - ; CHECK: [[BITCAST12:%[0-9]+]]:_(s64) = G_BITCAST [[UV12]](<2 x s32>) - ; CHECK: [[BITCAST13:%[0-9]+]]:_(s64) = G_BITCAST [[UV13]](<2 x s32>) - ; CHECK: [[BITCAST14:%[0-9]+]]:_(s64) = G_BITCAST [[UV14]](<2 x s32>) - ; CHECK: [[BITCAST15:%[0-9]+]]:_(s64) = G_BITCAST [[UV15]](<2 x s32>) - ; CHECK: [[BITCAST16:%[0-9]+]]:_(s64) = G_BITCAST [[UV16]](<2 x s32>) - ; CHECK: [[BITCAST17:%[0-9]+]]:_(s64) = G_BITCAST [[UV17]](<2 x s32>) - ; CHECK: [[BITCAST18:%[0-9]+]]:_(s64) = G_BITCAST [[UV18]](<2 x s32>) - ; CHECK: [[BITCAST19:%[0-9]+]]:_(s64) = G_BITCAST [[UV19]](<2 x s32>) - ; CHECK: [[BITCAST20:%[0-9]+]]:_(s64) = G_BITCAST [[UV20]](<2 x s32>) - ; CHECK: [[BITCAST21:%[0-9]+]]:_(s64) = G_BITCAST [[UV21]](<2 x s32>) - ; CHECK: [[BITCAST22:%[0-9]+]]:_(s64) = G_BITCAST [[UV22]](<2 x s32>) - ; CHECK: [[BITCAST23:%[0-9]+]]:_(s64) = G_BITCAST [[UV23]](<2 x s32>) - ; CHECK: [[BITCAST24:%[0-9]+]]:_(s64) = G_BITCAST [[UV24]](<2 x s32>) - ; CHECK: [[BITCAST25:%[0-9]+]]:_(s64) = G_BITCAST [[UV25]](<2 x s32>) - ; CHECK: [[BITCAST26:%[0-9]+]]:_(s64) = G_BITCAST [[UV26]](<2 x s32>) - ; CHECK: [[BITCAST27:%[0-9]+]]:_(s64) = G_BITCAST [[UV27]](<2 x s32>) - ; CHECK: [[BITCAST28:%[0-9]+]]:_(s64) = G_BITCAST [[UV28]](<2 x s32>) - ; CHECK: [[BITCAST29:%[0-9]+]]:_(s64) = G_BITCAST [[UV29]](<2 x s32>) - ; CHECK: [[BITCAST30:%[0-9]+]]:_(s64) = G_BITCAST [[UV30]](<2 x s32>) - ; CHECK: [[BITCAST31:%[0-9]+]]:_(s64) = G_BITCAST [[UV31]](<2 x s32>) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST]](s64), [[BITCAST1]](s64), [[BITCAST2]](s64), [[BITCAST3]](s64), [[BITCAST4]](s64), [[BITCAST5]](s64), [[BITCAST6]](s64), [[BITCAST7]](s64), [[BITCAST8]](s64), [[BITCAST9]](s64), [[BITCAST10]](s64), [[BITCAST11]](s64), [[BITCAST12]](s64), [[BITCAST13]](s64), [[BITCAST14]](s64), [[BITCAST15]](s64) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST16]](s64), [[BITCAST17]](s64), [[BITCAST18]](s64), [[BITCAST19]](s64), [[BITCAST20]](s64), [[BITCAST21]](s64), [[BITCAST22]](s64), [[BITCAST23]](s64), [[BITCAST24]](s64), [[BITCAST25]](s64), [[BITCAST26]](s64), [[BITCAST27]](s64), [[BITCAST28]](s64), [[BITCAST29]](s64), [[BITCAST30]](s64), [[BITCAST31]](s64) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>), implicit [[BUILD_VECTOR1]](<16 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>), [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>), [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>), [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>), [[UV20:%[0-9]+]]:_(<2 x s32>), [[UV21:%[0-9]+]]:_(<2 x s32>), [[UV22:%[0-9]+]]:_(<2 x s32>), [[UV23:%[0-9]+]]:_(<2 x s32>), [[UV24:%[0-9]+]]:_(<2 x s32>), [[UV25:%[0-9]+]]:_(<2 x s32>), [[UV26:%[0-9]+]]:_(<2 x s32>), [[UV27:%[0-9]+]]:_(<2 x s32>), [[UV28:%[0-9]+]]:_(<2 x s32>), [[UV29:%[0-9]+]]:_(<2 x s32>), [[UV30:%[0-9]+]]:_(<2 x s32>), [[UV31:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[UV]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[UV1]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s64) = G_BITCAST [[UV2]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[UV3]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s64) = G_BITCAST [[UV4]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s64) = G_BITCAST [[UV5]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s64) = G_BITCAST [[UV6]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s64) = G_BITCAST [[UV7]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s64) = G_BITCAST [[UV8]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s64) = G_BITCAST [[UV9]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s64) = G_BITCAST [[UV10]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s64) = G_BITCAST [[UV11]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(s64) = G_BITCAST [[UV12]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(s64) = G_BITCAST [[UV13]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(s64) = G_BITCAST [[UV14]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(s64) = G_BITCAST [[UV15]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s64) = G_BITCAST [[UV16]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s64) = G_BITCAST [[UV17]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s64) = G_BITCAST [[UV18]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(s64) = G_BITCAST [[UV19]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(s64) = G_BITCAST [[UV20]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(s64) = G_BITCAST [[UV21]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(s64) = G_BITCAST [[UV22]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST23:%[0-9]+]]:_(s64) = G_BITCAST [[UV23]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST24:%[0-9]+]]:_(s64) = G_BITCAST [[UV24]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST25:%[0-9]+]]:_(s64) = G_BITCAST [[UV25]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST26:%[0-9]+]]:_(s64) = G_BITCAST [[UV26]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST27:%[0-9]+]]:_(s64) = G_BITCAST [[UV27]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST28:%[0-9]+]]:_(s64) = G_BITCAST [[UV28]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST29:%[0-9]+]]:_(s64) = G_BITCAST [[UV29]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST30:%[0-9]+]]:_(s64) = G_BITCAST [[UV30]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST31:%[0-9]+]]:_(s64) = G_BITCAST [[UV31]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST]](s64), [[BITCAST1]](s64), [[BITCAST2]](s64), [[BITCAST3]](s64), [[BITCAST4]](s64), [[BITCAST5]](s64), [[BITCAST6]](s64), [[BITCAST7]](s64), [[BITCAST8]](s64), [[BITCAST9]](s64), [[BITCAST10]](s64), [[BITCAST11]](s64), [[BITCAST12]](s64), [[BITCAST13]](s64), [[BITCAST14]](s64), [[BITCAST15]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST16]](s64), [[BITCAST17]](s64), [[BITCAST18]](s64), [[BITCAST19]](s64), [[BITCAST20]](s64), [[BITCAST21]](s64), [[BITCAST22]](s64), [[BITCAST23]](s64), [[BITCAST24]](s64), [[BITCAST25]](s64), [[BITCAST26]](s64), [[BITCAST27]](s64), [[BITCAST28]](s64), [[BITCAST29]](s64), [[BITCAST30]](s64), [[BITCAST31]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>), implicit [[BUILD_VECTOR1]](<16 x s64>) %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:_(<64 x s32>) = G_CONCAT_VECTORS %0, %0 %2:_(<32 x s64>) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir index 792d064567e6..846e528f9527 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: concat_vectors_v2s32_v2s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index e93b5d047146..c624320344b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- name: extract_vector_elt_0_v2i32 @@ -9,8 +9,9 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_0_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -24,8 +25,9 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_1_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -39,8 +41,9 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_2_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -54,8 +57,9 @@ body: | liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-LABEL: name: extract_vector_elt_0_v3i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -69,8 +73,9 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: extract_vector_elt_0_v4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -205,8 +210,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -241,8 +247,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -258,8 +265,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -301,11 +309,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -328,9 +333,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1017,8 +1021,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1036,8 +1041,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 1 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1055,8 +1061,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 2 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1164,8 +1171,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<2 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1181,8 +1189,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v8i64 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<8 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<8 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1198,8 +1207,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v16i64 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<16 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<16 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1215,8 +1225,9 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_CONSTANT i64 0 %2:_(s32) = G_TRUNC %1 @@ -1234,8 +1245,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_7_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1255,8 +1267,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 32 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1294,218 +1307,13 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_33_v64p3 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST1]](<16 x p3>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST2]](<16 x p3>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST3]](<16 x p3>) - ; CHECK-NEXT: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store (p3) into %stack.0, align 256, addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store (p3) into %stack.0 + 4, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store (p3) into %stack.0 + 8, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store (p3) into %stack.0 + 12, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store (p3) into %stack.0 + 16, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK-NEXT: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store (p3) into %stack.0 + 20, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK-NEXT: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store (p3) into %stack.0 + 24, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK-NEXT: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store (p3) into %stack.0 + 28, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK-NEXT: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store (p3) into %stack.0 + 32, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK-NEXT: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store (p3) into %stack.0 + 36, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK-NEXT: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store (p3) into %stack.0 + 40, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK-NEXT: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store (p3) into %stack.0 + 44, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK-NEXT: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store (p3) into %stack.0 + 48, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK-NEXT: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store (p3) into %stack.0 + 52, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store (p3) into %stack.0 + 56, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store (p3) into %stack.0 + 60, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store (p3) into %stack.0 + 64, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store (p3) into %stack.0 + 68, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store (p3) into %stack.0 + 72, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK-NEXT: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store (p3) into %stack.0 + 76, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK-NEXT: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store (p3) into %stack.0 + 80, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK-NEXT: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store (p3) into %stack.0 + 84, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK-NEXT: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store (p3) into %stack.0 + 88, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK-NEXT: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store (p3) into %stack.0 + 92, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK-NEXT: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store (p3) into %stack.0 + 96, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK-NEXT: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store (p3) into %stack.0 + 100, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK-NEXT: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store (p3) into %stack.0 + 104, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK-NEXT: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store (p3) into %stack.0 + 108, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK-NEXT: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store (p3) into %stack.0 + 112, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK-NEXT: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store (p3) into %stack.0 + 116, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK-NEXT: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store (p3) into %stack.0 + 120, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store (p3) into %stack.0 + 124, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store (p3) into %stack.0 + 128, align 128, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK-NEXT: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store (p3) into %stack.0 + 132, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store (p3) into %stack.0 + 136, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store (p3) into %stack.0 + 140, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK-NEXT: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store (p3) into %stack.0 + 144, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK-NEXT: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store (p3) into %stack.0 + 148, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK-NEXT: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store (p3) into %stack.0 + 152, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK-NEXT: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store (p3) into %stack.0 + 156, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK-NEXT: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store (p3) into %stack.0 + 160, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK-NEXT: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store (p3) into %stack.0 + 164, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK-NEXT: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store (p3) into %stack.0 + 168, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK-NEXT: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store (p3) into %stack.0 + 172, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK-NEXT: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store (p3) into %stack.0 + 176, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK-NEXT: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store (p3) into %stack.0 + 180, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK-NEXT: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store (p3) into %stack.0 + 184, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK-NEXT: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store (p3) into %stack.0 + 188, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK-NEXT: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store (p3) into %stack.0 + 192, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK-NEXT: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store (p3) into %stack.0 + 196, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK-NEXT: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store (p3) into %stack.0 + 200, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK-NEXT: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store (p3) into %stack.0 + 204, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK-NEXT: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store (p3) into %stack.0 + 208, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK-NEXT: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store (p3) into %stack.0 + 212, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK-NEXT: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store (p3) into %stack.0 + 216, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK-NEXT: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store (p3) into %stack.0 + 220, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK-NEXT: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store (p3) into %stack.0 + 224, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK-NEXT: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store (p3) into %stack.0 + 228, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK-NEXT: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store (p3) into %stack.0 + 232, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK-NEXT: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store (p3) into %stack.0 + 236, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK-NEXT: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store (p3) into %stack.0 + 240, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK-NEXT: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store (p3) into %stack.0 + 244, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK-NEXT: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store (p3) into %stack.0 + 248, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK-NEXT: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store (p3) into %stack.0 + 252, basealign 256, addrspace 5) - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load (p3) from %stack.0 + 132, addrspace 5) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LOAD4]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index e8e863a964c9..ba80462858f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -187,8 +187,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v2s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -201,8 +202,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v2s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -215,8 +217,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -229,8 +232,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -243,8 +247,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 64 $vgpr0 = COPY %1 @@ -257,8 +262,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -313,8 +319,9 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -328,8 +335,9 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -343,8 +351,9 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -357,8 +366,9 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -372,8 +382,9 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -387,8 +398,9 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -401,8 +413,9 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -416,8 +429,9 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -431,8 +445,9 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -444,13 +459,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -463,13 +473,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 8 %2:_(s32) = G_ANYEXT %1 @@ -482,13 +487,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -501,13 +501,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset24 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 24 %2:_(s32) = G_ANYEXT %1 @@ -522,26 +517,7 @@ body: | ; CHECK-LABEL: name: extract_s8_v3s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -555,21 +531,7 @@ body: | ; CHECK-LABEL: name: extract_s8_v5s1_offset4 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF2]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF3]], [[UV5]](<5 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 %2:_(s32) = G_ANYEXT %1 @@ -582,8 +544,17 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -595,8 +566,17 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -823,13 +803,9 @@ body: | ; CHECK-LABEL: name: extract_s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(s16) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -908,12 +884,17 @@ body: | ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -926,12 +907,17 @@ body: | ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index 5f772be31f72..16bb69c872ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- @@ -231,25 +231,19 @@ body: | ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -274,25 +268,19 @@ body: | ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -309,12 +297,9 @@ body: | ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index 497d0ceeb620..aaff85970bec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fadd_s32 @@ -446,21 +446,21 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir index fc5d260bd826..6631fbf4ec7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir index 61ee24f03151..6c7b5d73a016 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fcos_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir index c25666e8cced..f230dbc1ae68 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1,9 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -enable-unsafe-fp-math -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_fdiv_s16 @@ -20,75 +20,75 @@ body: | ; SI-LABEL: name: test_fdiv_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fdiv_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fdiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fdiv_s16 ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -113,78 +113,78 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_denorms_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_denorms_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_on ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) ; GFX10-LABEL: name: test_fdiv_s32_denorms_on ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FDIV %0, %1 @@ -206,86 +206,86 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_denorms_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_denorms_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) ; GFX10-LABEL: name: test_fdiv_s32_denorms_off ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FDIV %0, %1 @@ -307,86 +307,86 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) ; GFX10-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = arcp G_FDIV %0, %1 @@ -408,93 +408,93 @@ body: | ; SI-LABEL: name: test_fdiv_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; VI-LABEL: name: test_fdiv_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-LABEL: name: test_fdiv_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[COPY]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[COPY]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) ; GFX10-LABEL: name: test_fdiv_s64 ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; GFX10: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_FDIV %0, %1 @@ -516,159 +516,159 @@ body: | ; SI-LABEL: name: test_fdiv_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fdiv_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; VI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; VI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fdiv_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX9: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s32 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-LABEL: name: test_fdiv_v2s32 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX10: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FDIV %0, %1 @@ -683,143 +683,143 @@ body: | ; SI-LABEL: name: test_fdiv_v2s32_flags ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fdiv_v2s32_flags ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; VI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; VI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; VI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fdiv_v2s32_flags ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX9: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s32_flags ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-LABEL: name: test_fdiv_v2s32_flags ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX10: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = nnan G_FDIV %0, %1 @@ -834,193 +834,193 @@ body: | ; SI-LABEL: name: test_fdiv_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fdiv_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; VI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; VI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; VI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; VI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; VI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; VI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; VI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; VI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; VI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; VI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; VI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; VI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; VI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; VI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; VI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; VI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; VI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; VI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; VI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; VI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; VI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; VI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; VI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; VI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fdiv_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; GFX9: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; GFX9: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; GFX9: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; GFX9: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; GFX9: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX9: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; GFX9: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; GFX9: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; GFX9: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; GFX9: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX9: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; GFX9: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; GFX9-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; GFX9-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; GFX9-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; GFX9-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; GFX9-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; GFX9-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; GFX9-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; GFX9-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; GFX9-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s32 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-UNSAFE: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32) - ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s32 ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; GFX10: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; GFX10: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; GFX10: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; GFX10: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; GFX10: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX10: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; GFX10: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; GFX10: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; GFX10: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; GFX10: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX10: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; GFX10: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; GFX10-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; GFX10-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; GFX10-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; GFX10-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; GFX10-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; GFX10-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; GFX10-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; GFX10-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; GFX10-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; GFX10-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; GFX10-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_FDIV %0, %1 @@ -1035,172 +1035,172 @@ body: | ; SI-LABEL: name: test_fdiv_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; SI: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; SI: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; SI: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; SI: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; SI: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; SI: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; SI: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; SI: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; SI: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT7]](s64) - ; SI: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64) - ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV13]](s32), [[UV19]] - ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV15]](s32), [[UV17]] - ; SI: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP3]] - ; SI: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[XOR1]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; SI-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; SI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; SI-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; SI-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT7]](s64) + ; SI-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV13]](s32), [[UV19]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV15]](s32), [[UV17]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP3]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[XOR1]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_fdiv_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; VI: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; VI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; VI: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; VI: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; VI: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; VI: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; VI: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; VI: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; VI: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; VI: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; VI-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_fdiv_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; GFX9: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; GFX9: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; GFX9: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; GFX9: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; GFX9: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; GFX9: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; GFX9: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; GFX9-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s64 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[UV]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64) - ; GFX9-UNSAFE: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT1]], [[C]] - ; GFX9-UNSAFE: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[INT1]], [[INT1]] - ; GFX9-UNSAFE: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA7]], [[C]] - ; GFX9-UNSAFE: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FMA8]], [[FMA7]], [[FMA7]] - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[FMA9]] - ; GFX9-UNSAFE: [[FMA10:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[UV1]] - ; GFX9-UNSAFE: [[FMA11:%[0-9]+]]:_(s64) = G_FMA [[FMA10]], [[FMA9]], [[FMUL1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA5]](s64), [[FMA11]](s64) - ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[UV]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[INT1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA7]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FMA8]], [[FMA7]], [[FMA7]] + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[FMA9]] + ; GFX9-UNSAFE-NEXT: [[FMA10:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[UV1]] + ; GFX9-UNSAFE-NEXT: [[FMA11:%[0-9]+]]:_(s64) = G_FMA [[FMA10]], [[FMA9]], [[FMUL1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA5]](s64), [[FMA11]](s64) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX10-LABEL: name: test_fdiv_v2s64 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; GFX10: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; GFX10: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; GFX10: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; GFX10: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; GFX10: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; GFX10: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; GFX10: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; GFX10-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_FDIV %0, %1 @@ -1215,159 +1215,159 @@ body: | ; SI-LABEL: name: test_fdiv_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fdiv_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; VI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fdiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-UNSAFE: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX10-LABEL: name: test_fdiv_v2s16 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX10: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FDIV %0, %1 @@ -1382,225 +1382,225 @@ body: | ; SI-LABEL: name: test_fdiv_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 - ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 - ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] - ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fdiv_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; VI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; VI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; VI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; VI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX9: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX9: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX9: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) - ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-UNSAFE: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s16 ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX10: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX10: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX10: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX10: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX10: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX10-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX10-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FDIV %0, %1 @@ -1616,303 +1616,303 @@ body: | ; SI-LABEL: name: test_fdiv_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 - ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 - ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] - ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[INT21:%[0-9]+]]:_(s32), [[INT22:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 0 - ; SI: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 1 - ; SI: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32) - ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]] - ; SI: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] - ; SI: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] - ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]] - ; SI: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] - ; SI: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] - ; SI: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] - ; SI: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) - ; SI: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[INT21:%[0-9]+]]:_(s32), [[INT22:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 0 + ; SI-NEXT: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 1 + ; SI-NEXT: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32) + ; SI-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]] + ; SI-NEXT: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] + ; SI-NEXT: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] + ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]] + ; SI-NEXT: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] + ; SI-NEXT: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] + ; SI-NEXT: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] + ; SI-NEXT: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) + ; SI-NEXT: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fdiv_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; VI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; VI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; VI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; VI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; VI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; VI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; VI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) + ; VI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] + ; VI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fdiv_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; GFX9: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX9: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX9: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; GFX9: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; GFX9: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] + ; GFX9-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v4s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16) - ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) - ; GFX9-UNSAFE: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; GFX9-UNSAFE: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL3]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL3]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-LABEL: name: test_fdiv_v4s16 ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; GFX10: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; GFX10: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX10: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX10: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; GFX10: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; GFX10: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX10-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) + ; GFX10-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] + ; GFX10-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FDIV %0, %1 @@ -1927,68 +1927,68 @@ body: | ; SI-LABEL: name: test_fdiv_s16_constant_one_rcp ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fdiv_s16_constant_one_rcp ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s16) = G_FCONSTANT half 1.0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %1 @@ -2005,69 +2005,69 @@ body: | ; SI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s16) = G_FCONSTANT half -1.0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %1 @@ -2084,72 +2084,72 @@ body: | ; SI-LABEL: name: test_fdiv_s32_constant_one_rcp ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_constant_one_rcp ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[INT]](s32) ; GFX10-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = G_FCONSTANT float 1.0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FDIV %0, %1 @@ -2164,77 +2164,77 @@ body: | ; SI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[INT]](s32) ; GFX10-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = G_FCONSTANT float -1.0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FDIV %0, %1 @@ -2256,88 +2256,88 @@ body: | ; SI-LABEL: name: test_fdiv_s64_constant_one_rcp ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; VI-LABEL: name: test_fdiv_s64_constant_one_rcp ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) ; GFX10-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX10: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) %0:_(s64) = G_FCONSTANT double 1.0 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s64) = G_FDIV %0, %1 @@ -2359,93 +2359,93 @@ body: | ; SI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; VI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-UNSAFE: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C1]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] + ; GFX9-UNSAFE-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C1]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) ; GFX10-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX10: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) %0:_(s64) = G_FCONSTANT double -1.0 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s64) = G_FDIV %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir index c60f86af5e7e..0abe9ef1cb06 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -12,22 +12,22 @@ body: | ; GFX6-LABEL: name: test_fexp_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX8-LABEL: name: test_fexp_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX8: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX9-LABEL: name: test_fexp_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FEXP %0 $vgpr0 = COPY %1 @@ -41,22 +41,22 @@ body: | ; GFX6-LABEL: name: test_fexp_s32_nnan ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] - ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX8-LABEL: name: test_fexp_s32_nnan ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] - ; GFX8: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX9-LABEL: name: test_fexp_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] - ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan G_FEXP %0 $vgpr0 = COPY %1 @@ -70,34 +70,34 @@ body: | ; GFX6-LABEL: name: test_fexp_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_fexp_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX8: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fexp_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FEXP %0 $vgpr0_vgpr1 = COPY %1 @@ -111,40 +111,40 @@ body: | ; GFX6-LABEL: name: test_fexp_v3s32 ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX6: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX6-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX8-LABEL: name: test_fexp_v3s32 ; GFX8: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX8: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX8: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX8-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fexp_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX9: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX9-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FEXP %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -158,30 +158,30 @@ body: | ; GFX6-LABEL: name: test_fexp_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_fexp_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX8: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fexp_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_FEXP %1 @@ -197,47 +197,47 @@ body: | ; GFX6-LABEL: name: test_fexp_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] - ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX8-LABEL: name: test_fexp_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX8: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX8: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] - ; GFX8: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_1]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_1]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_fexp_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] - ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FEXP %1 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir index 413df7a3ed3d..8ab87e06338b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_ffloor_s32 @@ -12,16 +12,16 @@ body: | ; SI-LABEL: name: test_ffloor_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; SI: $vgpr0 = COPY [[FFLOOR]](s32) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) ; VI-LABEL: name: test_ffloor_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; VI: $vgpr0 = COPY [[FFLOOR]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) ; GFX9-LABEL: name: test_ffloor_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; GFX9: $vgpr0 = COPY [[FFLOOR]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -35,22 +35,22 @@ body: | ; SI-LABEL: name: test_ffloor_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; VI-LABEL: name: test_ffloor_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) ; GFX9-LABEL: name: test_ffloor_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -65,20 +65,20 @@ body: | ; SI-LABEL: name: test_ffloor_s64_nnan ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]] - ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; VI-LABEL: name: test_ffloor_s64_nnan ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) ; GFX9-LABEL: name: test_ffloor_s64_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = nnan G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -93,22 +93,22 @@ body: | ; SI-LABEL: name: test_ffloor_s64_nssaz ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nsz G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = nsz G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]] - ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nsz G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = nsz G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; VI-LABEL: name: test_ffloor_s64_nssaz ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) ; GFX9-LABEL: name: test_ffloor_s64_nssaz ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = nsz G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -123,24 +123,24 @@ body: | ; SI-LABEL: name: test_ffloor_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_ffloor_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_ffloor_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_FFLOOR %1 @@ -156,25 +156,25 @@ body: | ; SI-LABEL: name: test_ffloor_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_ffloor_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_ffloor_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -188,28 +188,28 @@ body: | ; SI-LABEL: name: test_ffloor_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_ffloor_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; VI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_ffloor_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FFLOOR %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -223,36 +223,36 @@ body: | ; SI-LABEL: name: test_ffloor_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV]](s64), [[UV]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FMINNUM_IEEE]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](s64) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C]] - ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV1]](s64), [[UV1]] - ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FMINNUM_IEEE1]] - ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV]](s64), [[UV]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](s64) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV1]](s64), [[UV1]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FMINNUM_IEEE1]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_ffloor_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_ffloor_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_FFLOOR %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -266,51 +266,51 @@ body: | ; SI-LABEL: name: test_ffloor_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_ffloor_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_ffloor_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -323,64 +323,64 @@ body: | ; SI-LABEL: name: test_ffloor_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_ffloor_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_ffloor_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FFLOOR %0 %2:_(<3 x s32>) = G_ANYEXT %1 @@ -395,92 +395,92 @@ body: | ; SI-LABEL: name: test_ffloor_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ffloor_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; VI-NEXT: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; GFX9-NEXT: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index e28dc5e9b1e6..fed4cfcc793a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fma_s32 @@ -12,22 +12,22 @@ body: | ; SI-LABEL: name: test_fma_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; SI: $vgpr0 = COPY [[FMA]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; SI-NEXT: $vgpr0 = COPY [[FMA]](s32) ; VI-LABEL: name: test_fma_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; VI: $vgpr0 = COPY [[FMA]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; VI-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX9-LABEL: name: test_fma_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -42,22 +42,22 @@ body: | ; SI-LABEL: name: test_fma_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; SI: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) ; VI-LABEL: name: test_fma_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; VI: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) ; GFX9-LABEL: name: test_fma_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -73,38 +73,38 @@ body: | ; SI-LABEL: name: test_fma_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fma_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fma_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr1 @@ -125,37 +125,37 @@ body: | ; SI-LABEL: name: test_fma_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fma_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fma_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -171,40 +171,40 @@ body: | ; SI-LABEL: name: test_fma_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fma_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fma_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -220,43 +220,43 @@ body: | ; SI-LABEL: name: test_fma_v4s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_fma_v4s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_fma_v4s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -272,37 +272,37 @@ body: | ; SI-LABEL: name: test_fma_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_fma_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_fma_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -318,68 +318,68 @@ body: | ; SI-LABEL: name: test_fma_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_fma_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0 = COPY [[FMA]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -395,159 +395,159 @@ body: | ; SI-LABEL: name: test_fma_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fma_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] - ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) - ; GFX9: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST9]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 @@ -569,127 +569,127 @@ body: | ; SI-LABEL: name: test_fma_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) + ; SI-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fma_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] - ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] - ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 0aa015803cd9..acae7a10d509 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fmaxnum_s32_ieee_mode_on @@ -15,25 +15,25 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -51,19 +51,19 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -78,19 +78,19 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = nnan G_FMAXNUM %0, %1 @@ -106,22 +106,22 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -137,22 +137,22 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -167,19 +167,19 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -194,25 +194,25 @@ body: | ; SI-LABEL: name: test_fmaxnum_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) ; VI-LABEL: name: test_fmaxnum_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) ; GFX9-LABEL: name: test_fmaxnum_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_FMAXNUM %0, %1 @@ -227,35 +227,35 @@ body: | ; SI-LABEL: name: test_fmaxnum_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fmaxnum_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fmaxnum_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -273,43 +273,43 @@ body: | ; SI-LABEL: name: test_fmaxnum_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fmaxnum_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fmaxnum_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FMAXNUM %0, %1 @@ -324,61 +324,61 @@ body: | ; SI-LABEL: name: test_fmaxnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fmaxnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FMAXNUM %0, %1 @@ -392,137 +392,143 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-LABEL: name: test_fmaxnum_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -542,113 +548,113 @@ body: | ; SI-LABEL: name: test_fmaxnum_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fmaxnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] + ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] + ; VI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FMAXNUM %0, %1 @@ -666,37 +672,37 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; VI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -716,31 +722,31 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMAXNUM %0, %1 @@ -760,37 +766,37 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -810,31 +816,31 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMINNUM %0, %1 @@ -854,22 +860,22 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMAXNUM %0, %1 @@ -887,54 +893,54 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16) = G_FCONSTANT half 0xH0000 %2:_(<2 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index 9b6b39b01c38..7c8934a28edd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fminnum_s32_ieee_mode_on @@ -15,25 +15,25 @@ body: | ; SI-LABEL: name: test_fminnum_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -51,19 +51,19 @@ body: | ; SI-LABEL: name: test_fminnum_s32_ieee_mode_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -78,19 +78,19 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = nnan G_FMINNUM %0, %1 @@ -106,22 +106,22 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_lhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan_lhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -137,22 +137,22 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -167,19 +167,19 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -194,25 +194,25 @@ body: | ; SI-LABEL: name: test_fminnum_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) ; VI-LABEL: name: test_fminnum_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) ; GFX9-LABEL: name: test_fminnum_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_FMINNUM %0, %1 @@ -227,35 +227,35 @@ body: | ; SI-LABEL: name: test_fminnum_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fminnum_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fminnum_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -273,43 +273,43 @@ body: | ; SI-LABEL: name: test_fminnum_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fminnum_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fminnum_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FMINNUM %0, %1 @@ -324,61 +324,61 @@ body: | ; SI-LABEL: name: test_fminnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fminnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FMINNUM %0, %1 @@ -392,137 +392,143 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-LABEL: name: test_fminnum_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -542,113 +548,113 @@ body: | ; SI-LABEL: name: test_fminnum_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] + ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] + ; VI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FMINNUM %0, %1 @@ -666,37 +672,37 @@ body: | ; SI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; VI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -716,31 +722,31 @@ body: | ; SI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; VI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMINNUM %0, %1 @@ -760,37 +766,37 @@ body: | ; SI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -810,31 +816,31 @@ body: | ; SI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMAXNUM %0, %1 @@ -854,22 +860,22 @@ body: | ; SI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMINNUM %0, %1 @@ -887,54 +893,54 @@ body: | ; SI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16) = G_FCONSTANT half 0xH0000 %2:_(<2 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index 970c9f64565f..5e86ea25e674 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s --- name: test_fmul_s32 @@ -433,21 +433,21 @@ body: | ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index cdf9fab798c8..e8662438fb50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fneg_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir index 2143bdcc85a3..bcd4627aadd7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- name: test_fpext_f16_to_f32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index ddf5ec6db384..f0d91b08d007 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -223,26 +223,9 @@ body: | ; CHECK-LABEL: name: test_freeze_s1056 ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV32]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV2]](s33792) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1056) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1056) = G_ANYEXT [[COPY]](s512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1056) = G_FREEZE [[ANYEXT]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1056) %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s1056) = G_ANYEXT %0 %2:_(s1056) = G_FREEZE %1 @@ -417,19 +400,18 @@ body: | ; CHECK-LABEL: name: test_freeze_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32) ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]] ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(<33 x s32>), [[UV49:%[0-9]+]]:_(<33 x s32>), [[UV50:%[0-9]+]]:_(<33 x s32>), [[UV51:%[0-9]+]]:_(<33 x s32>), [[UV52:%[0-9]+]]:_(<33 x s32>), [[UV53:%[0-9]+]]:_(<33 x s32>), [[UV54:%[0-9]+]]:_(<33 x s32>), [[UV55:%[0-9]+]]:_(<33 x s32>), [[UV56:%[0-9]+]]:_(<33 x s32>), [[UV57:%[0-9]+]]:_(<33 x s32>), [[UV58:%[0-9]+]]:_(<33 x s32>), [[UV59:%[0-9]+]]:_(<33 x s32>), [[UV60:%[0-9]+]]:_(<33 x s32>), [[UV61:%[0-9]+]]:_(<33 x s32>), [[UV62:%[0-9]+]]:_(<33 x s32>), [[UV63:%[0-9]+]]:_(<33 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<528 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[UV48]](<33 x s32>) + ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[UV33]](s32), [[UV34]](s32), [[UV35]](s32), [[UV36]](s32), [[UV37]](s32), [[UV38]](s32), [[UV39]](s32), [[UV40]](s32), [[UV41]](s32), [[UV42]](s32), [[UV43]](s32), [[UV44]](s32), [[UV45]](s32), [[UV46]](s32), [[UV47]](s32), [[UV48]](s32), [[UV49]](s32), [[UV50]](s32), [[UV51]](s32), [[UV52]](s32), [[UV53]](s32), [[UV54]](s32), [[UV55]](s32), [[UV56]](s32), [[UV57]](s32), [[UV58]](s32), [[UV59]](s32), [[UV60]](s32), [[UV61]](s32), [[UV62]](s32), [[UV63]](s32), [[FREEZE2]](s32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 S_NOP 0, implicit %1 @@ -530,16 +512,13 @@ body: | ; CHECK-LABEL: name: test_freeze_v3s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s8>) = G_TRUNC %0 %2:_(<3 x s8>) = G_FREEZE %1 @@ -569,24 +548,26 @@ body: | ; CHECK-LABEL: name: test_freeze_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]] - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s16>) = G_FREEZE %1 @@ -616,28 +597,33 @@ body: | ; CHECK-LABEL: name: test_freeze_v5s16 ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]] - ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32), [[BITCAST2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR3]](<5 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST3]](s32), [[LSHR]](s32), [[BITCAST4]](s32), [[LSHR1]](s32), [[BITCAST5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR]](<5 x s32>) %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:_(<5 x s16>) = G_TRUNC %0 %2:_(<5 x s16>) = G_FREEZE %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index d4ec463bd34f..ed51693e83a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fshl_s32_s32 @@ -11,37 +11,37 @@ body: | ; SI-LABEL: name: test_fshl_s32_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; SI: $vgpr0 = COPY [[FSHR1]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) ; VI-LABEL: name: test_fshl_s32_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; VI: $vgpr0 = COPY [[FSHR1]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) ; GFX9-LABEL: name: test_fshl_s32_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; GFX9: $vgpr0 = COPY [[FSHR1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[FSHR1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -57,61 +57,61 @@ body: | ; SI-LABEL: name: test_fshl_v2s32_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; SI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; SI: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; SI: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; SI: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; SI-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] + ; SI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fshl_v2s32_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; VI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; VI: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; VI: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; VI: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; VI-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] + ; VI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fshl_v2s32_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; GFX9: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; GFX9: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; GFX9: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; GFX9-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] + ; GFX9-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -127,66 +127,66 @@ body: | ; SI-LABEL: name: test_fshl_s16_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fshl_s16_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fshl_s16_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -206,115 +206,115 @@ body: | ; SI-LABEL: name: test_fshl_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_fshl_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C3]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-LABEL: name: test_fshl_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -330,55 +330,55 @@ body: | ; SI-LABEL: name: test_fshl_s64_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] - ; SI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; VI-LABEL: name: test_fshl_s64_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] - ; VI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; GFX9-LABEL: name: test_fshl_s64_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -394,83 +394,83 @@ body: | ; SI-LABEL: name: test_fshl_s8_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshl_s8_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshl_s8_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -490,124 +490,124 @@ body: | ; SI-LABEL: name: test_fshl_s24_s24 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; SI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshl_s24_s24 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; VI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshl_s24_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -627,235 +627,238 @@ body: | ; SI-LABEL: name: test_fshl_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; SI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; SI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC4]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC6]] - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; SI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC4]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC6]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) ; VI-LABEL: name: test_fshl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; VI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; VI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; VI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) - ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR8]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR8]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[AND]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR3]], [[AND1]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR4]] - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[AND2]](<2 x s16>) - ; GFX9: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) - ; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32) - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR3]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR4]] + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -886,215 +889,215 @@ body: | ; SI-LABEL: name: test_fshl_v4s16_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; SI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] - ; SI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL5]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL5]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fshl_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[LSHR6]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR7]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR9]] - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] - ; VI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C3]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND5]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR11]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]] - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[AND6]](s16) - ; VI: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C3]](s16) - ; VI: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND7]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR13]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[LSHR6]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR7]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR9]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C3]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND5]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR11]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[AND6]](s16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C3]](s16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND7]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR13]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fshl_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[AND]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[AND2]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index b2a3886c7dd8..c2d2557cae8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fshr_s32_s32 @@ -12,22 +12,22 @@ body: | ; SI-LABEL: name: test_fshr_s32_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; SI: $vgpr0 = COPY [[FSHR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; SI-NEXT: $vgpr0 = COPY [[FSHR]](s32) ; VI-LABEL: name: test_fshr_s32_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; VI: $vgpr0 = COPY [[FSHR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[FSHR]](s32) ; GFX9-LABEL: name: test_fshr_s32_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; GFX9: $vgpr0 = COPY [[FSHR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[FSHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -43,37 +43,37 @@ body: | ; SI-LABEL: name: test_fshr_v2s32_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; SI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fshr_v2s32_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; VI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fshr_v2s32_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; GFX9: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -89,65 +89,65 @@ body: | ; SI-LABEL: name: test_fshr_s16_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fshr_s16_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fshr_s16_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -167,184 +167,184 @@ body: | ; SI-LABEL: name: test_fshr_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) ; VI-LABEL: name: test_fshr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[AND]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -360,55 +360,55 @@ body: | ; SI-LABEL: name: test_fshr_s64_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; SI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; VI-LABEL: name: test_fshr_s64_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; VI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; GFX9-LABEL: name: test_fshr_s64_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -424,80 +424,80 @@ body: | ; SI-LABEL: name: test_fshr_s8_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshr_s8_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshr_s8_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -517,121 +517,121 @@ body: | ; SI-LABEL: name: test_fshr_s24_s24 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; SI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshr_s24_s24 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; VI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshr_s24_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -651,425 +651,363 @@ body: | ; SI-LABEL: name: test_fshr_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; SI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; SI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C6]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY7]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY8]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY9]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL6]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] - ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C5]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY12]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C4]] - ; SI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C5]] - ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY13]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) - ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL9]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT10]](s32) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY14]](s32) - ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) - ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) - ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[DEF]], [[ZEXT12]](s32) - ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[C2]], [[COPY15]](s32) - ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) - ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] - ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[ZEXT13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY16]](s32) - ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[COPY17]](s32) - ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[SHL14]] - ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; SI: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) - ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; SI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C4]] - ; SI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C5]] - ; SI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; SI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND26]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR8]](s16) - ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[SHL12]], [[C1]] - ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY21]](s32) - ; SI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND27]](s16) - ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]] - ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[ZEXT15]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C4]] - ; SI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C5]] - ; SI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; SI: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND30]](s16) - ; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR9]](s16) - ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) - ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND32:%[0-9]+]]:_(s32) = G_AND [[SHL13]], [[C1]] - ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY22]](s32) - ; SI: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND31]](s16) - ; SI: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]] - ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[ZEXT17]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] - ; SI: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; SI: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C]](s32) - ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]] - ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) - ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] - ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; SI: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) - ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] - ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C6]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL6]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C5]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C4]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C5]] + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY12]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT9]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL11]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C4]] + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C5]] + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C1]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[ZEXT11]](s32) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL13]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C]](s32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL14]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] + ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]] + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL15]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>) ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; VI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; VI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND3]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR4]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND5]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND6]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR6]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL6]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] - ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C5]] - ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[OR2]], [[AND7]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL4]], [[C3]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[SHL7]], [[LSHR9]] - ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C4]] - ; VI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C5]] - ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[OR3]], [[AND9]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) - ; VI: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR13]] - ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[AND13]](s16) - ; VI: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[DEF]], [[C3]](s16) - ; VI: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR15]] - ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[C3]](s16) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL14]] - ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; VI: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) - ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] - ; VI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] - ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[OR8]], [[AND15]](s16) - ; VI: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[SHL12]], [[C3]](s16) - ; VI: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[LSHR17]], [[AND16]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[SHL15]], [[LSHR18]] - ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C4]] - ; VI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C5]] - ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; VI: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[OR9]], [[AND17]](s16) - ; VI: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[SHL13]], [[C3]](s16) - ; VI: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[LSHR19]], [[AND18]](s16) - ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[SHL16]], [[LSHR20]] - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]] - ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) - ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] - ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) - ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] - ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND3]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND4]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND5]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND6]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR6]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL6]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C5]] + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[OR2]], [[AND7]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL4]], [[C3]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND8]](s16) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[SHL7]], [[LSHR9]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C4]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C5]] + ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[OR3]], [[AND9]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]] + ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR13]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL11]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND13]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C3]](s16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL12]], [[LSHR15]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] + ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]] + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL15]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[AND]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) - ; GFX9: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32) - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -1100,352 +1038,352 @@ body: | ; SI-LABEL: name: test_fshr_v4s16_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) - ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] - ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) - ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) - ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] - ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] - ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; SI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; SI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; SI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; SI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; SI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) - ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) - ; SI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) - ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] - ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; SI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; SI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; SI: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND28]](s16) - ; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) - ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) - ; SI: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) - ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] - ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] - ; SI: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; SI: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]] - ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] + ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) + ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) + ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) + ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) + ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] + ; SI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] + ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] + ; SI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] + ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND28]](s16) + ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) + ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) + ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) + ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] + ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fshr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16) - ; VI: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16) - ; VI: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR15]] - ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16) - ; VI: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16) - ; VI: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR17]] - ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) - ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; VI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; VI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16) - ; VI: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16) - ; VI: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL13]], [[LSHR21]] - ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; VI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16) - ; VI: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16) - ; VI: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[SHL14]], [[LSHR23]] - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]] - ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR15]] + ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR17]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) + ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] + ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL13]], [[LSHR21]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] + ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] + ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[SHL14]], [[LSHR23]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fshr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[AND]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[AND2]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR1]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir index 717ad3bfd40a..0228f4d3e339 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fsin_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index f3f4695b8863..df0471000aa3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fsqrt_s32 @@ -12,16 +12,16 @@ body: | ; SI-LABEL: name: test_fsqrt_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; SI: $vgpr0 = COPY [[FSQRT]](s32) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FSQRT]](s32) ; VI-LABEL: name: test_fsqrt_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; VI: $vgpr0 = COPY [[FSQRT]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FSQRT]](s32) ; GFX9-LABEL: name: test_fsqrt_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; GFX9: $vgpr0 = COPY [[FSQRT]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FSQRT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FSQRT %0 $vgpr0 = COPY %1 @@ -35,16 +35,16 @@ body: | ; SI-LABEL: name: test_fsqrt_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] - ; SI: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) ; VI-LABEL: name: test_fsqrt_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) ; GFX9-LABEL: name: test_fsqrt_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 @@ -58,24 +58,24 @@ body: | ; SI-LABEL: name: test_fsqrt_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fsqrt_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fsqrt_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_FSQRT %1 @@ -91,25 +91,25 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fsqrt_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fsqrt_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 @@ -123,28 +123,28 @@ body: | ; SI-LABEL: name: test_fsqrt_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; SI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fsqrt_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fsqrt_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FSQRT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -158,25 +158,25 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] - ; SI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_fsqrt_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_fsqrt_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_FSQRT %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -190,51 +190,51 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fsqrt_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fsqrt_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FSQRT %0 $vgpr0 = COPY %1 @@ -247,64 +247,64 @@ body: | ; SI-LABEL: name: test_fsqrt_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fsqrt_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fsqrt_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSQRT %0 %2:_(<3 x s32>) = G_ANYEXT %1 @@ -319,92 +319,92 @@ body: | ; SI-LABEL: name: test_fsqrt_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; VI-NEXT: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; GFX9-NEXT: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 6dbeab45e1cd..46980b399c4d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fsub_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index be6d4baccd1f..7d6db970ad6b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -9,7 +9,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s1) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -22,7 +22,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s7 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s7) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s8 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s8) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -48,7 +48,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s16 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s16) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -61,7 +61,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s32 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s32) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -73,7 +73,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_48 ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) %0:_(s48) = G_IMPLICIT_DEF %1:_(s64) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 @@ -86,7 +86,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s64 ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) %0:_(s64) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -98,8 +98,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s65 ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[DEF]](s128) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[DEF]](s128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) %0:_(s65) = G_IMPLICIT_DEF %1:_(s96) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -112,7 +112,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s128 ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](s128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](s128) %0:_(s128) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -124,7 +124,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_256 ; CHECK: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](s256) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](s256) %0:_(s256) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 ... @@ -136,8 +136,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s448 ; CHECK: [[DEF:%[0-9]+]]:_(s448) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s448) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -150,8 +150,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s512 ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s512) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -164,8 +164,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1024 ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s1024) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -178,8 +178,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1056 ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) %0:_(s1056) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 @@ -192,8 +192,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s2048 ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1024) - ; CHECK: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) %0:_(s2048) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 @@ -206,7 +206,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) %0:_(<2 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -218,7 +218,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s32 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) %0:_(<3 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2 = COPY %0 ... @@ -230,7 +230,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) %0:_(<4 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -242,7 +242,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<5 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<5 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -254,7 +254,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v6s32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<6 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<6 x s32>) %0:_(<6 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -266,7 +266,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v7s32 ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<7 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<7 x s32>) %0:_(<7 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -278,7 +278,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v8s32 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>) %0:_(<8 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 ... @@ -290,7 +290,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v16s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>) %0:_(<16 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 ... @@ -302,7 +302,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v17s32 ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<17 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>) %0:_(<17 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -314,7 +314,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v32s32 ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<32 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<32 x s32>) %0:_(<32 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -327,11 +327,11 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - ; CHECK: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 %34:_(p1) = COPY $vgpr0_vgpr1 @@ -347,9 +347,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v64s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 S_NOP 0, implicit %0, implicit %1 @@ -362,7 +362,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(<2 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 @@ -375,7 +375,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s1 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) %0:_(<3 x s1>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -388,7 +388,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s8 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(<2 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 @@ -401,9 +401,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -416,7 +416,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s16 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) %0:_(<2 x s16>) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -428,11 +428,28 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF %2:_(<4 x s16>) = G_INSERT %1, %0, 0 @@ -446,7 +463,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<4 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<4 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -458,12 +475,42 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 @@ -477,9 +524,41 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v6s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[DEF]](<6 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 @@ -493,7 +572,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v8s16 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<8 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<8 x s16>) %0:_(<8 x s16>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -505,7 +584,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s64 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<2 x s64>) %0:_(<2 x s64>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -517,8 +596,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(<4 x s8>) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -530,7 +609,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p0 ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p0) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p0) %0:_(p0) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -542,7 +621,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p1 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p1) %0:_(p1) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -554,7 +633,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p2 ; CHECK: [[DEF:%[0-9]+]]:_(p2) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](p2) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p2) %0:_(p2) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -566,7 +645,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p3 ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](p3) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p3) %0:_(p3) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -578,7 +657,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p4 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p4) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p4) %0:_(p4) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -590,7 +669,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p5 ; CHECK: [[DEF:%[0-9]+]]:_(p5) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](p5) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p5) %0:_(p5) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -602,7 +681,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p999 ; CHECK: [[DEF:%[0-9]+]]:_(p999) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p999) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p999) %0:_(p999) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 @@ -615,8 +694,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s1024 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s1024>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<2 x s1024>) - ; CHECK: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<2 x s1024>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024) %0:_(<2 x s1024>) = G_IMPLICIT_DEF %1:_(s1024), %2:_(s1024) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -630,8 +709,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s1024 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1024>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024), [[UV2:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<3 x s1024>) - ; CHECK: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024), implicit [[UV2]](s1024) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024), [[UV2:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<3 x s1024>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024), implicit [[UV2]](s1024) %0:_(<3 x s1024>) = G_IMPLICIT_DEF %1:_(s1024), %2:_(s1024), %3:_(s1024) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index f5b89b7e9de9..1f037cc5775a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -10,8 +10,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 0 @@ -28,8 +29,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_1_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 1 @@ -107,8 +109,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v16s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<16 x s64>) = G_INSERT [[DEF]], [[COPY]](s64), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT]](<16 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<16 x s64>) = G_IMPLICIT_DEF %2:_(s32) = G_CONSTANT i32 0 @@ -126,8 +129,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_CONSTANT i8 0 @@ -145,8 +149,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[DEF]], [[COPY]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(<2 x s8>) = G_IMPLICIT_DEF @@ -166,8 +171,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(s64) = G_CONSTANT i64 0 @@ -305,55 +311,62 @@ body: | ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[C3]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(<4 x s32>), [[UV33:%[0-9]+]]:_(<4 x s32>), [[UV34:%[0-9]+]]:_(<4 x s32>), [[UV35:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(<4 x s32>), [[UV37:%[0-9]+]]:_(<4 x s32>), [[UV38:%[0-9]+]]:_(<4 x s32>), [[UV39:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: G_STORE [[UV32]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV33]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV34]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV35]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV36]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV37]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV38]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV39]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) - ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 1a8b37cf7d4c..b6e4c91dec08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -409,8 +409,9 @@ body: | ; CHECK-LABEL: name: test_insert_v2s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<2 x s32>) = G_INSERT %0, %1, 0 @@ -425,8 +426,9 @@ body: | ; CHECK-LABEL: name: test_insert_v2s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<2 x s32>) = G_INSERT %0, %1, 32 @@ -441,8 +443,9 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 0 @@ -457,8 +460,9 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 32 @@ -473,8 +477,9 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 64 @@ -489,8 +494,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -505,8 +511,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -521,8 +528,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -537,8 +545,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset96 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 96 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 96 @@ -553,8 +562,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -569,8 +580,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -585,8 +598,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -601,8 +616,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s96_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -617,8 +634,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s96_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -633,8 +652,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -649,8 +670,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -665,8 +688,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -681,8 +706,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -697,8 +724,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -763,11 +792,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -813,11 +844,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -834,16 +865,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -861,16 +905,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -888,16 +944,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -915,15 +983,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(<2 x s16>) = COPY $vgpr2 @@ -940,15 +1022,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(<2 x s16>) = COPY $vgpr2 @@ -965,15 +1061,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -990,15 +1099,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s32_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -1016,9 +1138,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1034,9 +1172,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1052,9 +1205,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1070,9 +1239,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset48 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 48 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1088,8 +1272,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 0 @@ -1104,8 +1305,26 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 16 @@ -1120,8 +1339,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 32 @@ -1136,9 +1372,27 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %1, 0 @@ -1154,9 +1408,26 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %1, 0 @@ -1172,8 +1443,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 0 @@ -1188,8 +1475,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 16 @@ -1204,8 +1508,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir index 8a87eb1d0f68..1e6412fd8aa6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_intrinsic_round_s32 @@ -12,49 +12,49 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX6: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX6: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX6: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX8-LABEL: name: test_intrinsic_round_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX8: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX8: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: $vgpr0 = COPY [[FADD]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-LABEL: name: test_intrinsic_round_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX9: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -68,49 +68,49 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s32_flags ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX6: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX6: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX6: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX8-LABEL: name: test_intrinsic_round_s32_flags ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX8: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX8: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: $vgpr0 = COPY [[FADD]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-LABEL: name: test_intrinsic_round_s32_flags ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX9: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX9: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nsz G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -124,71 +124,71 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; GFX6: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; GFX6: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX6: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX6: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C10]] - ; GFX6: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] - ; GFX6: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C10]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) ; GFX8-LABEL: name: test_intrinsic_round_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) ; GFX9-LABEL: name: test_intrinsic_round_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1 = COPY %1 @@ -202,79 +202,79 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX6: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX6: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX6: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX6: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_intrinsic_round_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX8: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX8: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX8: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_intrinsic_round_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX9: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1 = COPY %1 @@ -288,115 +288,115 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX6: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; GFX6: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX6: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX6: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C10]] - ; GFX6: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND3]](s32) - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; GFX6: [[AND4:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND4]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] - ; GFX6: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT4]] - ; GFX6: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX6: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX6: [[AND5:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C10]] - ; GFX6: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND5]] - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C9]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C8]] - ; GFX6: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], [[SELECT5]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C10]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND3]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT4]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C10]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND5]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C9]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C8]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], [[SELECT5]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: test_intrinsic_round_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] - ; GFX8: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_intrinsic_round_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] - ; GFX9: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -410,68 +410,68 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_intrinsic_round_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_intrinsic_round_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_INTRINSIC_ROUND %1 @@ -487,127 +487,127 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX6: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -620,216 +620,216 @@ body: | liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-LABEL: name: test_intrinsic_round_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX6: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] - ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; GFX6: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] - ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; GFX6: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] - ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) - ; GFX6: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX6: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX6: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) - ; GFX6: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX6: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) - ; GFX6: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] - ; GFX6: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) + ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) + ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) + ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) + ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] + ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX8: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX8: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX8: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX8: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX8: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX8-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX9: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX9: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_INTRINSIC_ROUND %1 @@ -846,232 +846,232 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX6: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] - ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; GFX6: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] - ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; GFX6: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] - ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) - ; GFX6: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX6: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX6: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) - ; GFX6: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX6: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) - ; GFX6: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] - ; GFX6: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT21]] - ; GFX6: [[FPTRUNC9:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) - ; GFX6: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC9]] - ; GFX6: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) - ; GFX6: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FPEXT22]], [[FPEXT23]] - ; GFX6: [[FPTRUNC10:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD6]](s32) - ; GFX6: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC10]] - ; GFX6: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX6: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] - ; GFX6: [[FPEXT24:%[0-9]+]]:_(s32) = G_FPEXT [[FABS3]](s16) - ; GFX6: [[FPEXT25:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT24]](s32), [[FPEXT25]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] - ; GFX6: [[FPEXT26:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC9]](s16) - ; GFX6: [[FPEXT27:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT3]](s16) - ; GFX6: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FPEXT26]], [[FPEXT27]] - ; GFX6: [[FPTRUNC11:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC11]](s16) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX6: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) + ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) + ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) + ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) + ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] + ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) + ; GFX6-NEXT: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT21]] + ; GFX6-NEXT: [[FPTRUNC9:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) + ; GFX6-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC9]] + ; GFX6-NEXT: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) + ; GFX6-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FPEXT22]], [[FPEXT23]] + ; GFX6-NEXT: [[FPTRUNC10:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD6]](s32) + ; GFX6-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC10]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX6-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] + ; GFX6-NEXT: [[FPEXT24:%[0-9]+]]:_(s32) = G_FPEXT [[FABS3]](s16) + ; GFX6-NEXT: [[FPEXT25:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT24]](s32), [[FPEXT25]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] + ; GFX6-NEXT: [[FPEXT26:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC9]](s16) + ; GFX6-NEXT: [[FPEXT27:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT3]](s16) + ; GFX6-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FPEXT26]], [[FPEXT27]] + ; GFX6-NEXT: [[FPTRUNC11:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC11]](s16) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX8: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX8: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX8: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX8: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX8: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX8: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] - ; GFX8: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] - ; GFX8: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX8: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX8: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] - ; GFX8: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] - ; GFX8: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD7]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX8-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] + ; GFX8-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] + ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] + ; GFX8-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] + ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD7]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX9: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX9: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX9: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] - ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] - ; GFX9: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX9: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] - ; GFX9: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] - ; GFX9: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] + ; GFX9-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] + ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] + ; GFX9-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] + ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir index 5d037703572a..eb5a4f959602 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_inttoptr_s64_to_p0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index c0bfde9e89a5..271b7b23380a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -1,46 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) - ; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) + ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) - ; PACKED: $vgpr0 = COPY [[ANYEXT]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) + ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex } @@ -48,48 +50,50 @@ define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v2f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex } @@ -97,70 +101,73 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -168,56 +175,58 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -225,46 +234,48 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[UV]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[UV]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 %tfe = extractvalue { half, i32 } %res, 1 @@ -275,54 +286,56 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v2f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 @@ -333,76 +346,79 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) - ; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -413,62 +429,64 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -479,20 +497,22 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNPACKED: $vgpr0 = COPY [[DEF]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[DEF]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex } @@ -500,47 +520,49 @@ define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex } @@ -548,20 +570,22 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex } @@ -569,70 +593,69 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -640,68 +663,66 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -709,60 +730,56 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -770,56 +787,58 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -827,55 +846,57 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -883,52 +904,54 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -936,24 +959,26 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -961,46 +986,48 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[UV]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[UV]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 %tfe = extractvalue { half, i32 } %res, 1 @@ -1011,54 +1038,56 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 @@ -1069,54 +1098,56 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 @@ -1127,76 +1158,75 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -1207,75 +1237,73 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -1286,75 +1314,73 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -1365,62 +1391,64 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -1431,61 +1459,63 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -1496,59 +1526,61 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -1559,59 +1591,61 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index 95bc6b2d00be..e605938b4b7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=GCN %s define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; GCN-LABEL: name: image_load_f32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index ce9ef775779b..862d88de3b5c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -1,86 +1,90 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX81 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX81 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { ; UNPACKED-LABEL: name: image_store_f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX81: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -88,83 +92,87 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) { ; UNPACKED-LABEL: name: image_store_v2f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v2f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v2f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -172,130 +180,124 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { ; UNPACKED-LABEL: name: image_store_v3f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v3f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX81: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX81: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX81: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX81: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; GFX81: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX81: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX81: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX81: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; GFX81: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX81: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX81: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX81: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX81: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; GFX81: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX81: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX81: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX10: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -303,93 +305,97 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { ; UNPACKED-LABEL: name: image_store_v4f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v4f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) - ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v4f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index 307199ba0616..27b6aaed74a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -10,9 +10,9 @@ body: | ; GCN-LABEL: name: s_buffer_load_s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GCN: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) + ; GCN-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -28,12 +28,11 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -49,13 +48,12 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3p3 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[UV]](<3 x s32>) - ; GCN: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -71,13 +69,12 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s16 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) - ; GCN: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -93,12 +90,11 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<6 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -114,12 +110,11 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s64 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s64>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -135,57 +130,54 @@ body: | ; GCN-LABEL: name: s_buffer_load_v12s8 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) - ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GCN: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) - ; GCN: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GCN: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GCN: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; GCN: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GCN: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GCN: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) - ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GCN: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]] - ; GCN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] - ; GCN: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GCN: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GCN: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GCN: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; GCN: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GCN: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GCN: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]] - ; GCN: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GCN: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GCN: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GCN: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GCN: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] - ; GCN: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; GCN: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) - ; GCN: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GCN: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GCN: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]] - ; GCN: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; GCN: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; GCN: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; GCN: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GCN: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; GCN: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; GCN: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GCN: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; GCN: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GCN: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) + ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) + ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) + ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]] + ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; GCN-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] + ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GCN-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] + ; GCN-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] + ; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] + ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GCN-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] + ; GCN-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] + ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -202,12 +194,11 @@ body: | ; GCN-LABEL: name: s_buffer_load_s96 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index ec638c5a3907..2bed0e0c85b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s --- name: test_load_constant32bit_s32_align1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index a95c47eb578c..067e66444ac6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -1160,16 +1160,10 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1179,16 +1173,10 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1198,16 +1186,10 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4) @@ -3364,12 +3346,12 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3395,12 +3377,12 @@ body: | ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3426,12 +3408,12 @@ body: | ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -4935,24 +4917,27 @@ body: | ; CI-LABEL: name: test_load_constant_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -4973,10 +4958,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) @@ -4984,10 +4969,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) @@ -4995,10 +4980,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5119,10 +5104,10 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) @@ -5230,10 +5215,10 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) @@ -5341,10 +5326,10 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index cd4dedba0910..7b047ddd27e3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_load_flat_s1_align1 @@ -1010,16 +1010,10 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1029,16 +1023,10 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_flat_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1048,16 +1036,10 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0) @@ -3510,12 +3492,12 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3541,12 +3523,12 @@ body: | ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3572,12 +3554,12 @@ body: | ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -4944,10 +4926,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) @@ -4955,10 +4937,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) @@ -4966,10 +4948,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -4990,10 +4972,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) @@ -5001,10 +4983,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) @@ -5012,10 +4994,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5136,10 +5118,10 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) @@ -5247,10 +5229,10 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) @@ -5358,10 +5340,10 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 70538d0097a7..acaf1dab33f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -1203,8 +1203,9 @@ body: | ; SI-LABEL: name: test_load_global_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1775,16 +1776,10 @@ body: | ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>) - ; SI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1794,16 +1789,10 @@ body: | ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1813,16 +1802,10 @@ body: | ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_global_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1832,16 +1815,10 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1851,16 +1828,10 @@ body: | ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1870,16 +1841,10 @@ body: | ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1) @@ -5837,12 +5802,12 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5868,12 +5833,12 @@ body: | ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5899,12 +5864,12 @@ body: | ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5930,12 +5895,12 @@ body: | ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5961,12 +5926,12 @@ body: | ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5979,12 +5944,12 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -6996,7 +6961,6 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7004,6 +6968,7 @@ body: | ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7029,7 +6994,6 @@ body: | ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7037,6 +7001,7 @@ body: | ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7062,7 +7027,6 @@ body: | ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7070,6 +7034,7 @@ body: | ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7095,7 +7060,6 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7103,6 +7067,7 @@ body: | ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7128,7 +7093,6 @@ body: | ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7136,6 +7100,7 @@ body: | ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -7148,7 +7113,6 @@ body: | ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7156,6 +7120,7 @@ body: | ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -7233,21 +7198,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7271,21 +7236,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7309,21 +7274,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7446,21 +7411,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7484,21 +7449,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7522,21 +7487,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7625,21 +7590,21 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7663,21 +7628,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7701,21 +7666,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7739,21 +7704,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7864,21 +7829,21 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7902,21 +7867,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7962,21 +7927,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8022,21 +7987,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8132,8 +8097,9 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8621,7 +8587,6 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8631,6 +8596,7 @@ body: | ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8662,7 +8628,6 @@ body: | ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8672,6 +8637,7 @@ body: | ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8703,7 +8669,6 @@ body: | ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8713,6 +8678,7 @@ body: | ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8744,7 +8710,6 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8754,6 +8719,7 @@ body: | ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8785,7 +8751,6 @@ body: | ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8795,6 +8760,7 @@ body: | ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -8809,7 +8775,6 @@ body: | ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8819,6 +8784,7 @@ body: | ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -8871,26 +8837,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8921,26 +8887,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8971,26 +8937,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9021,26 +8987,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9153,26 +9119,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9203,26 +9169,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9253,26 +9219,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9303,26 +9269,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9435,26 +9401,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9485,26 +9451,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9535,26 +9501,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9585,26 +9551,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9747,26 +9713,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9797,26 +9763,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9877,26 +9843,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9957,26 +9923,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -10504,8 +10470,9 @@ body: | ; SI-LABEL: name: test_load_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) @@ -11453,45 +11420,51 @@ body: | ; SI-LABEL: name: test_load_global_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11512,10 +11485,10 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11523,10 +11496,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11534,10 +11507,10 @@ body: | ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11545,10 +11518,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11556,10 +11529,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11567,10 +11540,10 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11691,10 +11664,10 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) @@ -11702,10 +11675,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -11813,10 +11786,10 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -11924,10 +11897,10 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) @@ -11935,10 +11908,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -12046,10 +12019,10 @@ body: | ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14947,17 +14920,18 @@ body: | ; SI-LABEL: name: test_global_v2s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 17f6b68307ea..46eabff12de2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -7733,12 +7733,12 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7764,12 +7764,12 @@ body: | ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7795,12 +7795,12 @@ body: | ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7826,12 +7826,12 @@ body: | ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7857,12 +7857,12 @@ body: | ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7875,12 +7875,12 @@ body: | ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7893,12 +7893,12 @@ body: | ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7911,12 +7911,12 @@ body: | ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -11741,10 +11741,10 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) @@ -11754,10 +11754,10 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11765,10 +11765,10 @@ body: | ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11776,10 +11776,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11787,10 +11787,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11798,10 +11798,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11809,10 +11809,10 @@ body: | ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11820,10 +11820,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index c13629c8dc03..cca3ae38e94e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -7496,10 +7496,10 @@ body: | ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7519,10 +7519,10 @@ body: | ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7542,10 +7542,10 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7565,10 +7565,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index f61de3cc898c..cf4144166b51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_lshr_s32_s32 @@ -414,43 +414,40 @@ body: | ; SI-LABEL: name: test_lshr_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_lshr_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -593,17 +590,12 @@ body: | ; SI-LABEL: name: test_lshr_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) @@ -617,29 +609,26 @@ body: | ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -647,8 +636,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -658,49 +646,52 @@ body: | ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -807,32 +798,33 @@ body: | ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 736f606fd1bd..60cdfdd685b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_or_s32 @@ -9,9 +9,9 @@ body: | ; CHECK-LABEL: name: test_or_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_OR %0, %1 @@ -26,9 +26,9 @@ body: | ; CHECK-LABEL: name: test_or_s1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: S_NOP 0, implicit [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_CONSTANT i32 0 @@ -46,22 +46,22 @@ body: | ; CHECK-LABEL: name: test_or_v2s1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -80,26 +80,26 @@ body: | ; CHECK-LABEL: name: test_or_v3s1 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP3]] - ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP4]] - ; CHECK: [[OR2:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[ICMP5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -118,9 +118,9 @@ body: | ; CHECK-LABEL: name: test_or_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_OR %0, %1 @@ -135,16 +135,16 @@ body: | ; CHECK-LABEL: name: test_or_s96 ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_OR %0, %1 @@ -159,13 +159,13 @@ body: | ; CHECK-LABEL: name: test_or_128 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(s128) = G_OR %0, %1 @@ -180,9 +180,9 @@ body: | ; CHECK-LABEL: name: test_or_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -200,9 +200,9 @@ body: | ; CHECK-LABEL: name: test_or_s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -220,12 +220,12 @@ body: | ; CHECK-LABEL: name: test_or_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -243,12 +243,12 @@ body: | ; CHECK-LABEL: name: test_or_s24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -266,9 +266,9 @@ body: | ; CHECK-LABEL: name: test_or_s48 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %0 @@ -286,9 +286,9 @@ body: | ; CHECK-LABEL: name: test_or_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_OR %0, %1 @@ -303,20 +303,16 @@ body: | ; CHECK-LABEL: name: test_or_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_OR %0, %1 @@ -331,13 +327,13 @@ body: | ; CHECK-LABEL: name: test_or_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV2]] - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV1]], [[UV3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = G_OR %0, %1 @@ -351,25 +347,22 @@ body: | ; CHECK-LABEL: name: test_or_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_OR %0, %1 @@ -386,13 +379,13 @@ body: | ; CHECK-LABEL: name: test_or_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_OR %0, %1 @@ -407,9 +400,9 @@ body: | ; CHECK-LABEL: name: test_or_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_OR %0, %1 @@ -423,41 +416,66 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_or_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]] - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -477,9 +495,9 @@ body: | ; CHECK-LABEL: name: test_or_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](<4 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_OR %0, %1 @@ -493,54 +511,102 @@ body: | ; CHECK-LABEL: name: test_or_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] - ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR9]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_OR %0, %1 @@ -555,34 +621,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_OR %0, %1 @@ -597,15 +644,15 @@ body: | ; CHECK-LABEL: name: test_or_v4s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 23b7328027af..5be7981d5c72 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -8,22 +8,26 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY]](s32), %bb.0, [[ADD]](s32), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY]](s32), %bb.0, [[ADD]](s32), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -54,34 +58,38 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BITCAST2]](<2 x s16>), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](<2 x s16>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BITCAST2]](<2 x s16>), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](<2 x s16>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -112,81 +120,85 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v3s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C5]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]] - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C7]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C7]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -221,48 +233,51 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v4s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -293,26 +308,30 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s32>) = G_PHI [[COPY]](<2 x s32>), %bb.0, [[BUILD_VECTOR]](<2 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](<2 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s32>) = G_PHI [[COPY]](<2 x s32>), %bb.0, [[BUILD_VECTOR]](<2 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<2 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -343,27 +362,31 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v3s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV3]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<3 x s32>) = G_PHI [[COPY]](<3 x s32>), %bb.0, [[BUILD_VECTOR]](<3 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[PHI]](<3 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s32>) = G_PHI [[COPY]](<3 x s32>), %bb.0, [[BUILD_VECTOR]](<3 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PHI]](<3 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 @@ -394,28 +417,32 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v4s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV4]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV6]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[COPY]](<4 x s32>), %bb.0, [[BUILD_VECTOR]](<4 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<4 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[COPY]](<4 x s32>), %bb.0, [[BUILD_VECTOR]](<4 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<4 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -446,32 +473,36 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v8s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<8 x s32>) = G_PHI [[COPY]](<8 x s32>), %bb.0, [[BUILD_VECTOR]](<8 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<8 x s32>) = G_PHI [[COPY]](<8 x s32>), %bb.0, [[BUILD_VECTOR]](<8 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 @@ -502,39 +533,43 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v16s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -565,55 +600,59 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v32s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] - ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] - ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] - ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] - ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] - ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] - ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] - ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] - ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] - ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] - ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] - ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] - ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] - ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] - ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] - ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] - ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -644,100 +683,104 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v64s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -768,26 +811,30 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s64 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[COPY]](s64), %bb.0, [[MV]](s64), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](s64) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[COPY]](s64), %bb.0, [[MV]](s64), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](s64) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -818,34 +865,38 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s64 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s64>) = G_PHI [[COPY]](<2 x s64>), %bb.0, [[BUILD_VECTOR]](<2 x s64>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<2 x s64>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s64>) = G_PHI [[COPY]](<2 x s64>), %bb.0, [[BUILD_VECTOR]](<2 x s64>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<2 x s64>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -876,42 +927,47 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v3s64 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; CHECK: [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV4]](s64) - ; CHECK: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV10]], [[UV12]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV11]], [[UV13]], [[UADDO3]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; CHECK: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV5]](s64) - ; CHECK: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV14]], [[UV16]] - ; CHECK: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV15]], [[UV17]], [[UADDO5]] - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[EXTRACT]](<3 x s64>), %bb.0, [[BUILD_VECTOR]](<3 x s64>), %bb.1 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[PHI]](<3 x s64>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 @@ -945,23 +1001,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p3 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p3) = G_PHI [[COPY]](p3), %bb.0, [[PTR_ADD]](p3), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](p3) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p3) = G_PHI [[COPY]](p3), %bb.0, [[PTR_ADD]](p3), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](p3) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -993,23 +1053,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p5 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p5) = G_PHI [[COPY]](p5), %bb.0, [[PTR_ADD]](p5), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](p5) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p5) = G_PHI [[COPY]](p5), %bb.0, [[PTR_ADD]](p5), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](p5) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1041,23 +1105,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p0 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[PTR_ADD]](p0), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p0) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[PTR_ADD]](p0), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p0) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1089,23 +1157,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p1) = G_PHI [[COPY]](p1), %bb.0, [[PTR_ADD]](p1), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p1) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p1) = G_PHI [[COPY]](p1), %bb.0, [[PTR_ADD]](p1), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p1) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1137,23 +1209,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p4 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p4) = G_PHI [[COPY]](p4), %bb.0, [[PTR_ADD]](p4), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p4) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p4) = G_PHI [[COPY]](p4), %bb.0, [[PTR_ADD]](p4), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p4) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1185,22 +1261,26 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p9999 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p9999) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(p9999) = G_IMPLICIT_DEF - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p9999) = G_PHI [[COPY]](p9999), %bb.0, [[DEF]](p9999), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p9999) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p9999) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p9999) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p9999) = G_PHI [[COPY]](p9999), %bb.0, [[DEF]](p9999), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p9999) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1231,24 +1311,28 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[DEF]](s1), %bb.1 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[DEF]](s1), %bb.1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1281,27 +1365,31 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s7 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1334,27 +1422,31 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s8 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1387,24 +1479,28 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[DEF]](s16), %bb.1 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[DEF]](s16), %bb.1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1437,28 +1533,32 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s128 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; CHECK: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s128) = G_PHI [[COPY]](s128), %bb.0, [[MV]](s128), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](s128) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s128) = G_PHI [[COPY]](s128), %bb.0, [[MV]](s128), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](s128) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -1489,32 +1589,36 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s256 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV8]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV9]], [[UADDO1]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV10]], [[UADDE1]] - ; CHECK: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV11]], [[UADDE3]] - ; CHECK: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV4]], [[UV12]], [[UADDE5]] - ; CHECK: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV13]], [[UADDE7]] - ; CHECK: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV6]], [[UV14]], [[UADDE9]] - ; CHECK: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV15]], [[UADDE11]] - ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32), [[UADDE6]](s32), [[UADDE8]](s32), [[UADDE10]](s32), [[UADDE12]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s256) = G_PHI [[COPY]](s256), %bb.0, [[MV]](s256), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](s256) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV8]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV9]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV10]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV11]], [[UADDE3]] + ; CHECK-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV4]], [[UV12]], [[UADDE5]] + ; CHECK-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV13]], [[UADDE7]] + ; CHECK-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV6]], [[UV14]], [[UADDE9]] + ; CHECK-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV15]], [[UADDE11]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32), [[UADDE6]](s32), [[UADDE8]](s32), [[UADDE10]](s32), [[UADDE12]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s256) = G_PHI [[COPY]](s256), %bb.0, [[MV]](s256), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](s256) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 @@ -1545,54 +1649,58 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 - ; CHECK: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir index 6c17acdde8e2..da1491884bfd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_saddo_s7 @@ -9,21 +9,21 @@ body: | ; CHECK-LABEL: name: test_saddo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 7 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 7 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -43,18 +43,18 @@ body: | ; CHECK-LABEL: name: test_saddo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -74,15 +74,15 @@ body: | ; CHECK-LABEL: name: test_saddo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_SADDO %0, %1 @@ -99,19 +99,19 @@ body: | ; CHECK-LABEL: name: test_saddo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_SADDO %0, %1 @@ -128,46 +128,46 @@ body: | ; CHECK-LABEL: name: test_saddo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_SADDO %0, %1 @@ -183,80 +183,80 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_saddo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -277,84 +277,84 @@ body: | ; CHECK-LABEL: name: test_saddo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD3]], 16 - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] - ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 - ; CHECK: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] - ; CHECK: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD3]], 16 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_SADDO %0, %1 @@ -371,29 +371,29 @@ body: | ; CHECK-LABEL: name: test_saddo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[UV4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD1]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV6]](s32), [[C]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV7]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD1]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV6]](s32), [[C]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV7]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 0330579b6144..e39849ad6dd7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: saddsat_s7 @@ -12,55 +12,55 @@ body: | ; GFX6-LABEL: name: saddsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: saddsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -78,55 +78,55 @@ body: | ; GFX6-LABEL: name: saddsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: saddsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -144,113 +144,113 @@ body: | ; GFX6-LABEL: name: saddsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) - ; GFX6: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: saddsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ADD1]], [[C1]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ADD1]], [[C1]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -271,47 +271,47 @@ body: | ; GFX6-LABEL: name: saddsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: saddsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -329,82 +329,82 @@ body: | ; GFX6-LABEL: name: saddsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: saddsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: saddsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SADDSAT %0, %1 @@ -419,169 +419,163 @@ body: | ; GFX6-LABEL: name: saddsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC3]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SADDSAT %1, %2 @@ -598,148 +592,148 @@ body: | ; GFX6-LABEL: name: saddsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX6]] - ; GFX6: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN6]] - ; GFX6: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB7]], [[SHL7]] - ; GFX6: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB6]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[SMIN7]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ADD3]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX6]] + ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN6]] + ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB7]], [[SHL7]] + ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB6]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[SMIN7]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ADD3]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: saddsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC4]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC6]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX6]] - ; GFX8: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN6]] - ; GFX8: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB7]], [[TRUNC7]] - ; GFX8: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB6]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[SMIN7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC6]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX6]] + ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN6]] + ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB7]], [[TRUNC7]] + ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB6]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[SMIN7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: saddsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[UV2]] - ; GFX9: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x s16>), [[SADDSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x s16>), [[SADDSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SADDSAT %0, %1 @@ -754,37 +748,37 @@ body: | ; GFX6-LABEL: name: saddsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] - ; GFX6: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GFX8-LABEL: name: saddsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] - ; GFX8: $vgpr0 = COPY [[ADD]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] + ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GFX9-LABEL: name: saddsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SADDSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SADDSAT %0, %1 @@ -799,61 +793,61 @@ body: | ; GFX6-LABEL: name: saddsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: saddsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: saddsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] - ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SADDSAT %0, %1 @@ -868,70 +862,70 @@ body: | ; GFX6-LABEL: name: saddsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: saddsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: saddsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SADDSAT %0, %1 @@ -946,124 +940,124 @@ body: | ; GFX6-LABEL: name: saddsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX6: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: saddsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX8: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: saddsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX9: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SADDSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir index 88d6a71d1383..b0afa6227800 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s ... --- name: test_sbfx_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index 3b969e94d5ba..b2e80985ddf5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_sdiv_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index cac4b9b9864e..fa1488e87ffa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -260,35 +260,23 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 @@ -384,40 +372,65 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C2]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s32) = COPY $vgpr6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index 93224e57d07b..5529b114ac7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_sext_inreg_s32_1 @@ -705,10 +705,10 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir index 09064e3dec7d..088d586cae03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -109,10 +109,7 @@ body: | ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index b406ed427ab0..a1a14756cf6f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_shl_s32_s32 @@ -405,43 +405,40 @@ body: | ; SI-LABEL: name: test_shl_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_shl_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_shl_v3s64_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -664,32 +661,33 @@ body: | ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 8dc47c4e362d..8bbde77b4269 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -35,9 +35,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -59,9 +61,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -83,9 +87,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -131,8 +137,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -154,9 +161,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -178,10 +186,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32), [[EXTRACT2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -203,9 +214,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -293,50 +306,29 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; CHECK-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR2]](<3 x s32>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST6]](s32), [[LSHR3]](s32), [[BITCAST7]](s32) - ; CHECK-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR3]](<3 x s32>), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index e5a21d494c4c..faff7290f6d5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -862,66 +862,48 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX8-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX8-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX8-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX8-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX8-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT2]](s32), [[EXTRACT3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT4]](s32), [[EXTRACT5]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -948,12 +930,14 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] @@ -968,12 +952,14 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -997,12 +983,14 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -1016,12 +1004,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index 70633234f553..ce0afe0b50cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smax_s32 @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_smax_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[SMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; GFX9-LABEL: name: test_smax_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SMAX %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_smax_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_smax_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_smax_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SMAX %0, %1 @@ -69,27 +69,27 @@ body: | ; SI-LABEL: name: test_smax_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smax_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -107,34 +107,34 @@ body: | ; SI-LABEL: name: test_smax_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smax_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -152,25 +152,25 @@ body: | ; SI-LABEL: name: test_smax_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; VI: $vgpr0 = COPY [[SMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; GFX9-LABEL: name: test_smax_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: $vgpr0 = COPY [[SMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -188,31 +188,31 @@ body: | ; SI-LABEL: name: test_smax_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_smax_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; VI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_smax_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SMAX %0, %1 @@ -227,34 +227,34 @@ body: | ; SI-LABEL: name: test_smax_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; VI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; VI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smax_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; GFX9: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] + ; GFX9-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_SMAX %0, %1 @@ -269,50 +269,50 @@ body: | ; SI-LABEL: name: test_smax_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_smax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_smax_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SMAX %0, %1 @@ -327,73 +327,77 @@ body: | ; SI-LABEL: name: test_smax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] - ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMAX %0, %1 @@ -409,90 +413,90 @@ body: | ; SI-LABEL: name: test_smax_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG6]], [[SEXT_INREG7]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMAX2]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMAX3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; SI-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG6]], [[SEXT_INREG7]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMAX2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMAX3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC4]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] - ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] - ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index c693402b7005..08df7e169734 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smin_s32 @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_smin_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[SMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; GFX9-LABEL: name: test_smin_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SMIN %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_smin_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_smin_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_smin_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SMIN %0, %1 @@ -69,27 +69,27 @@ body: | ; SI-LABEL: name: test_smin_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smin_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -107,34 +107,34 @@ body: | ; SI-LABEL: name: test_smin_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smin_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -152,25 +152,25 @@ body: | ; SI-LABEL: name: test_smin_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; VI: $vgpr0 = COPY [[SMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; GFX9-LABEL: name: test_smin_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: $vgpr0 = COPY [[SMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -188,31 +188,31 @@ body: | ; SI-LABEL: name: test_smin_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_smin_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; VI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_smin_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SMIN %0, %1 @@ -227,34 +227,34 @@ body: | ; SI-LABEL: name: test_smin_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; VI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; VI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smin_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; GFX9: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] + ; GFX9-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_SMIN %0, %1 @@ -269,50 +269,50 @@ body: | ; SI-LABEL: name: test_smin_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_smin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_smin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SMIN %0, %1 @@ -327,73 +327,77 @@ body: | ; SI-LABEL: name: test_smin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] - ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMIN %0, %1 @@ -409,90 +413,90 @@ body: | ; SI-LABEL: name: test_smin_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG6]], [[SEXT_INREG7]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMIN2]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMIN3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; SI-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG6]], [[SEXT_INREG7]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMIN2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMIN3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC4]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] - ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] - ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index cc23f0e64e56..841aac4304bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_srem_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 9ad2e5c30258..ddfa0e0669c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: sshlsat_s7 @@ -12,64 +12,64 @@ body: | ; GFX6-LABEL: name: sshlsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) ; GFX8-LABEL: name: sshlsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -87,64 +87,64 @@ body: | ; GFX6-LABEL: name: sshlsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) ; GFX8-LABEL: name: sshlsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -162,121 +162,121 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C5]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C5]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C5]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: sshlsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX9: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX9: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -297,54 +297,54 @@ body: | ; GFX6-LABEL: name: sshlsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) ; GFX8-LABEL: name: sshlsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -362,104 +362,104 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: sshlsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: sshlsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SSHLSAT %0, %1 @@ -474,183 +474,177 @@ body: | ; GFX6-LABEL: name: sshlsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] - ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] + ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST5]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST4]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SSHLSAT %1, %2 @@ -667,188 +661,188 @@ body: | ; GFX6-LABEL: name: sshlsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] - ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) - ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] - ; GFX6: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] + ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) + ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] + ; GFX6-NEXT: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: sshlsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) - ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT7]](s16) - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT7]](s16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: sshlsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX9: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX9: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) - ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SSHLSAT %0, %1 @@ -863,43 +857,43 @@ body: | ; GFX6-LABEL: name: sshlsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) ; GFX8-LABEL: name: sshlsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) ; GFX9-LABEL: name: sshlsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SSHLSAT %0, %1 @@ -914,70 +908,70 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: sshlsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: sshlsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SSHLSAT %0, %1 @@ -992,46 +986,46 @@ body: | ; GFX6-LABEL: name: sshlsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) ; GFX8-LABEL: name: sshlsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) ; GFX9-LABEL: name: sshlsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SSHLSAT %0, %1 @@ -1046,76 +1040,76 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: sshlsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: sshlsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SSHLSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir index 348838dbdac7..8a4f380c02ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_ssubo_s7 @@ -9,21 +9,21 @@ body: | ; CHECK-LABEL: name: test_ssubo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 7 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 7 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -43,18 +43,18 @@ body: | ; CHECK-LABEL: name: test_ssubo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[SUB]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -74,15 +74,15 @@ body: | ; CHECK-LABEL: name: test_ssubo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[SUB]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_SSUBO %0, %1 @@ -99,19 +99,19 @@ body: | ; CHECK-LABEL: name: test_ssubo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_SSUBO %0, %1 @@ -128,46 +128,46 @@ body: | ; CHECK-LABEL: name: test_ssubo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST1]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_SSUBO %0, %1 @@ -183,80 +183,80 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_ssubo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -277,84 +277,84 @@ body: | ; CHECK-LABEL: name: test_ssubo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB3]], 16 - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] - ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 - ; CHECK: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] - ; CHECK: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB3]], 16 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_SSUBO %0, %1 @@ -371,29 +371,29 @@ body: | ; CHECK-LABEL: name: test_ssubo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[UV4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV6]](s32), [[C]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV7]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV6]](s32), [[C]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV7]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SSUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index 4fdea4bcf996..d6ae62a28a93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: ssubsat_s7 @@ -12,55 +12,55 @@ body: | ; GFX6-LABEL: name: ssubsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: ssubsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -78,55 +78,55 @@ body: | ; GFX6-LABEL: name: ssubsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: ssubsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -144,113 +144,113 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C2]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C3]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C3]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) - ; GFX6: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C2]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C3]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C3]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: ssubsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C2]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C3]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C3]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SUB5]], [[C1]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C2]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C3]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C3]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SUB5]], [[C1]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SSUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SSUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -271,47 +271,47 @@ body: | ; GFX6-LABEL: name: ssubsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: ssubsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C1]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -329,82 +329,82 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: ssubsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: ssubsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SSUBSAT %0, %1 @@ -419,169 +419,163 @@ body: | ; GFX6-LABEL: name: ssubsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC3]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SSUBSAT %1, %2 @@ -598,148 +592,148 @@ body: | ; GFX6-LABEL: name: ssubsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] - ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[SMAX6]], [[C1]] - ; GFX6: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] - ; GFX6: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[SMIN6]], [[C2]] - ; GFX6: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB9]], [[SHL7]] - ; GFX6: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB10]] - ; GFX6: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[SMIN7]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SUB11]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[SMAX6]], [[C1]] + ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[SMIN6]], [[C2]] + ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB9]], [[SHL7]] + ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB10]] + ; GFX6-NEXT: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[SMIN7]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SUB11]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: ssubsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC4]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC6]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] - ; GFX8: [[SUB9:%[0-9]+]]:_(s16) = G_SUB [[SMAX6]], [[C1]] - ; GFX8: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] - ; GFX8: [[SUB10:%[0-9]+]]:_(s16) = G_SUB [[SMIN6]], [[C2]] - ; GFX8: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB9]], [[TRUNC7]] - ; GFX8: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB10]] - ; GFX8: [[SUB11:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[SMIN7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB11]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC6]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s16) = G_SUB [[SMAX6]], [[C1]] + ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB10:%[0-9]+]]:_(s16) = G_SUB [[SMIN6]], [[C2]] + ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB9]], [[TRUNC7]] + ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB10]] + ; GFX8-NEXT: [[SUB11:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[SMIN7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB11]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: ssubsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[UV2]] - ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x s16>), [[SSUBSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x s16>), [[SSUBSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SSUBSAT %0, %1 @@ -754,37 +748,37 @@ body: | ; GFX6-LABEL: name: ssubsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] - ; GFX6: $vgpr0 = COPY [[SUB2]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB2]](s32) ; GFX8-LABEL: name: ssubsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] - ; GFX8: $vgpr0 = COPY [[SUB2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB2]](s32) ; GFX9-LABEL: name: ssubsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SSUBSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SSUBSAT %0, %1 @@ -799,61 +793,61 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: ssubsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: ssubsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] - ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SSUBSAT %0, %1 @@ -868,70 +862,70 @@ body: | ; GFX6-LABEL: name: ssubsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: ssubsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: ssubsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SSUBSAT %0, %1 @@ -946,124 +940,124 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX6: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: ssubsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX8: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: ssubsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX9: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SSUBSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index c64f0951965d..d4f19a54f59c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -1,16 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' %s 2> %t.err -o - | FileCheck -check-prefix=SI %s -# RUN: FileCheck -check-prefix=ERR %s < %t.err -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' 2> %t.err %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: FileCheck -check-prefix=ERR %s < %t.err -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s - - -# ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<132 x s16>) = G_CONCAT_VECTORS {{.*}} (in function: test_store_global_v11s16_align4) -# ERR-NOT: remark +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_store_global_s1_align1 @@ -2388,9 +2381,7 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -2429,7 +2420,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -2523,9 +2514,7 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -2542,7 +2531,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) @@ -2594,12 +2583,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2626,12 +2615,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2658,12 +2647,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -4091,9 +4080,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4132,7 +4119,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4230,9 +4217,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4249,7 +4234,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) @@ -4305,12 +4290,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4341,12 +4326,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4377,12 +4362,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4763,9 +4748,7 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4838,7 +4821,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4856,18 +4839,16 @@ body: | ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4940,7 +4921,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -4958,12 +4939,12 @@ body: | ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1) @@ -4978,9 +4959,7 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5011,7 +4990,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5019,18 +4998,16 @@ body: | ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5061,7 +5038,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5069,12 +5046,12 @@ body: | ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1) @@ -5089,39 +5066,39 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1) @@ -5136,39 +5113,39 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1) @@ -5183,39 +5160,39 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1) @@ -5230,9 +5207,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5305,7 +5280,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5324,19 +5299,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5409,7 +5382,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -5428,12 +5401,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1) @@ -5449,9 +5422,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5482,7 +5453,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5491,19 +5462,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5534,7 +5503,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5543,12 +5512,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1) @@ -5564,42 +5533,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1) @@ -5615,42 +5584,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1) @@ -5666,42 +5635,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1) @@ -5717,42 +5686,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<10 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1) @@ -5767,159 +5736,182 @@ body: | ; SI-LABEL: name: test_store_global_v11s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; SI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; SI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; SI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; SI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; CI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; CI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; CI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; CI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; CI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; VI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; VI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; VI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-NEXT: G_STORE [[BITCAST6]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; GFX9-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<11 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1) @@ -5935,42 +5927,46 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<12 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1) @@ -5986,9 +5982,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6061,7 +6055,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6080,19 +6074,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6165,7 +6157,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -6184,12 +6176,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 1, addrspace 1) @@ -6205,9 +6197,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6238,7 +6228,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -6247,19 +6237,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6290,7 +6278,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -6299,12 +6287,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 2, addrspace 1) @@ -6320,42 +6308,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 4, addrspace 1) @@ -6371,42 +6359,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 8, addrspace 1) @@ -6422,42 +6410,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 16, addrspace 1) @@ -7797,11 +7785,9 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -7874,7 +7860,6 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) @@ -7940,7 +7925,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7960,27 +7945,26 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8053,7 +8037,6 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) @@ -8119,7 +8102,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) @@ -8139,17 +8122,18 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8169,11 +8153,9 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8204,7 +8186,6 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) @@ -8230,7 +8211,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) @@ -8240,27 +8221,26 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8291,7 +8271,6 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) @@ -8317,7 +8296,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) @@ -8327,17 +8306,18 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8357,65 +8337,69 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8435,65 +8419,69 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8513,65 +8501,69 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 778d6932f8b0..2b562df4f134 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -152,12 +152,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -473,12 +473,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_96 ; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index 7a2012cf9d27..88cd74b29361 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck %s --- name: test_trunc_s64_to_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir index 3401b2857fb2..3eb5179c89c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_uadde_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir index 9aafa0da26f6..b057afa4f0e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_uaddo_s32 @@ -9,11 +9,11 @@ body: | ; CHECK-LABEL: name: test_uaddo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; CHECK: $vgpr0 = COPY [[UADDO]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_UADDO %0, %1 @@ -30,17 +30,17 @@ body: | ; CHECK-LABEL: name: test_uaddo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -61,17 +61,17 @@ body: | ; CHECK-LABEL: name: test_uaddo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -92,16 +92,16 @@ body: | ; CHECK-LABEL: name: test_uaddo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_UADDO %0, %1 @@ -118,37 +118,37 @@ body: | ; CHECK-LABEL: name: test_uaddo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_UADDO %0, %1 @@ -164,67 +164,67 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_uaddo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC6]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC7]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC6]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC7]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -245,66 +245,66 @@ body: | ; CHECK-LABEL: name: test_uaddo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC8]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC9]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC10]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD3]](s16), [[TRUNC11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC8]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC9]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC10]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD3]](s16), [[TRUNC11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_UADDO %0, %1 @@ -321,23 +321,23 @@ body: | ; CHECK-LABEL: name: test_uaddo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s32), [[UV4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s32), [[UV5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s32), [[UV5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index c12643df1c6b..9c210beb285a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: uaddsat_s7 @@ -12,40 +12,40 @@ body: | ; GFX6-LABEL: name: uaddsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: uaddsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -63,40 +63,40 @@ body: | ; GFX6-LABEL: name: uaddsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: uaddsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -114,88 +114,88 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C2]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C2]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: uaddsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -216,32 +216,32 @@ body: | ; GFX6-LABEL: name: uaddsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: uaddsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -259,57 +259,57 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: uaddsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC2]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: uaddsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UADDSAT %0, %1 @@ -324,134 +324,128 @@ body: | ; GFX6-LABEL: name: uaddsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] - ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_UADDSAT %1, %2 @@ -468,103 +462,103 @@ body: | ; GFX6-LABEL: name: uaddsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SHL6]], [[C1]] - ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SHL6]], [[C1]] + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: uaddsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC4]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC5]] - ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC6]] - ; GFX8: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC3]], [[TRUNC7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT3]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: uaddsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[UV2]] - ; GFX9: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x s16>), [[UADDSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x s16>), [[UADDSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_UADDSAT %0, %1 @@ -579,22 +573,22 @@ body: | ; GFX6-LABEL: name: uaddsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] - ; GFX6: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GFX8-LABEL: name: uaddsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) ; GFX9-LABEL: name: uaddsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UADDSAT %0, %1 @@ -609,36 +603,36 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[C]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[UV2]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UMIN]] - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[C]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[UV3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[C]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[UV2]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UMIN]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[C]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[UV3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: uaddsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: uaddsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] - ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UADDSAT %0, %1 @@ -653,40 +647,40 @@ body: | ; GFX6-LABEL: name: uaddsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: uaddsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: uaddsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UADDSAT %0, %1 @@ -701,70 +695,70 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: uaddsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: uaddsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UADDSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir index 25136c1db223..f63455446f24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s ... --- name: test_ubfx_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index e350fca51c26..83976e15d820 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umax_s32 @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_umax_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[UMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; GFX9-LABEL: name: test_umax_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMAX %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_umax_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_umax_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_umax_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UMAX %0, %1 @@ -69,28 +69,28 @@ body: | ; SI-LABEL: name: test_umax_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umax_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -108,34 +108,34 @@ body: | ; SI-LABEL: name: test_umax_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umax_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -153,28 +153,28 @@ body: | ; SI-LABEL: name: test_umax_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; VI: $vgpr0 = COPY [[UMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; GFX9-LABEL: name: test_umax_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; GFX9: $vgpr0 = COPY [[UMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -192,31 +192,31 @@ body: | ; SI-LABEL: name: test_umax_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_umax_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; VI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umax_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UMAX %0, %1 @@ -231,34 +231,34 @@ body: | ; SI-LABEL: name: test_umax_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; VI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; VI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umax_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; GFX9: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] + ; GFX9-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_UMAX %0, %1 @@ -273,50 +273,50 @@ body: | ; SI-LABEL: name: test_umax_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_umax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_umax_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UMAX %0, %1 @@ -331,74 +331,78 @@ body: | ; SI-LABEL: name: test_umax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] - ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMAX %0, %1 @@ -414,90 +418,90 @@ body: | ; SI-LABEL: name: test_umax_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMAX2]], [[C1]] - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMAX3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMAX2]], [[C1]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMAX3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC4]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] - ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] - ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_UMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index c0a2192a7945..d08f57f96123 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umin_s32 @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_umin_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[UMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; GFX9-LABEL: name: test_umin_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMIN %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_umin_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_umin_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_umin_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UMIN %0, %1 @@ -69,28 +69,28 @@ body: | ; SI-LABEL: name: test_umin_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umin_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -108,34 +108,34 @@ body: | ; SI-LABEL: name: test_umin_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umin_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -153,28 +153,28 @@ body: | ; SI-LABEL: name: test_umin_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; VI: $vgpr0 = COPY [[UMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; GFX9-LABEL: name: test_umin_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; GFX9: $vgpr0 = COPY [[UMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -192,31 +192,31 @@ body: | ; SI-LABEL: name: test_umin_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_umin_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; VI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umin_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UMIN %0, %1 @@ -231,34 +231,34 @@ body: | ; SI-LABEL: name: test_umin_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; VI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; VI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umin_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; GFX9: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] + ; GFX9-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_UMIN %0, %1 @@ -273,50 +273,50 @@ body: | ; SI-LABEL: name: test_umin_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_umin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_umin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UMIN %0, %1 @@ -331,74 +331,78 @@ body: | ; SI-LABEL: name: test_umin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] - ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMIN %0, %1 @@ -414,90 +418,90 @@ body: | ; SI-LABEL: name: test_umin_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMIN2]], [[C1]] - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMIN3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMIN2]], [[C1]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMIN3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC4]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] - ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] - ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_UMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 3c66fb4e4d60..eac097ae0a26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -10,14 +10,14 @@ body: | ; GFX8-LABEL: name: test_umulh_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[UMULH]](s32) ; GFX9-LABEL: name: test_umulh_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMULH]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMULH]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMULH %0, %1 @@ -32,22 +32,22 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulh_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UMULH %0, %1 @@ -62,60 +62,60 @@ body: | ; GFX8-LABEL: name: test_umulh_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_umulh_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UMULH %0, %1 @@ -130,114 +130,114 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] - ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_umulh_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] - ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UMULH %0, %1 @@ -252,26 +252,26 @@ body: | ; GFX8-LABEL: name: test_umulh_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) ; GFX9-LABEL: name: test_umulh_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -289,34 +289,34 @@ body: | ; GFX8-LABEL: name: test_umulh_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) ; GFX9-LABEL: name: test_umulh_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX9: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -333,52 +333,52 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-LABEL: name: test_umulh_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulh_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s16>) = G_TRUNC %0 @@ -395,117 +395,101 @@ body: | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-LABEL: name: test_umulh_v3s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) - ; GFX8: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C]] - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX8: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C]] + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_umulh_v3s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C1]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) - ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; GFX9: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) - ; GFX9: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR1]](<2 x s16>) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL]] - ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[DEF1]](s32) - ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -534,66 +518,66 @@ body: | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-LABEL: name: test_umulh_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umulh_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C2]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] - ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT4]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -618,125 +602,125 @@ body: | liveins: $vgpr0, $vgpr1 ; GFX8-LABEL: name: test_umulh_v4s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C4]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C4]](s16) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX8: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C4]](s16) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] - ; GFX8: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C5]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C5]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C4]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C4]](s16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX8-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C4]](s16) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] + ; GFX8-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C5]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C5]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_umulh_v4s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) - ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) - ; GFX9: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[LSHR7:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR6]](<2 x s16>) - ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR7]](<2 x s16>) - ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) - ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR7]](<2 x s16>) + ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index 3ed9e0cd6afb..84c4eec55f2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: ushlsat_s7 @@ -12,52 +12,52 @@ body: | ; GFX6-LABEL: name: ushlsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) ; GFX8-LABEL: name: ushlsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -75,52 +75,52 @@ body: | ; GFX6-LABEL: name: ushlsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) ; GFX8-LABEL: name: ushlsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -138,103 +138,103 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: ushlsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX8: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX9: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -255,42 +255,42 @@ body: | ; GFX6-LABEL: name: ushlsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) ; GFX8-LABEL: name: ushlsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -308,86 +308,86 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: ushlsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: ushlsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_USHLSAT %0, %1 @@ -402,159 +402,153 @@ body: | ; GFX6-LABEL: name: ushlsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] - ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST5]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST4]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_USHLSAT %1, %2 @@ -571,158 +565,158 @@ body: | ; GFX6-LABEL: name: ushlsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] - ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] - ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) + ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] + ; GFX6-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: ushlsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX8: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX8: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: ushlsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX9: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX9: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_USHLSAT %0, %1 @@ -737,31 +731,31 @@ body: | ; GFX6-LABEL: name: ushlsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: $vgpr0 = COPY [[SELECT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX8-LABEL: name: ushlsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX9-LABEL: name: ushlsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: $vgpr0 = COPY [[SELECT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_USHLSAT %0, %1 @@ -776,52 +770,52 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: ushlsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: ushlsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_USHLSAT %0, %1 @@ -836,34 +830,34 @@ body: | ; GFX6-LABEL: name: ushlsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: ushlsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: ushlsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_USHLSAT %0, %1 @@ -878,58 +872,58 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: ushlsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: ushlsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_USHLSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir index bd4cfd255870..818e990bbd1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_usube_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir index 9512db51141a..1099626dd503 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_usubo_s32 @@ -9,11 +9,11 @@ body: | ; CHECK-LABEL: name: test_usubo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO1]](s1) - ; CHECK: $vgpr0 = COPY [[USUBO]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[USUBO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_USUBO %0, %1 @@ -30,17 +30,17 @@ body: | ; CHECK-LABEL: name: test_usubo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -61,17 +61,17 @@ body: | ; CHECK-LABEL: name: test_usubo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -92,16 +92,16 @@ body: | ; CHECK-LABEL: name: test_usubo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_USUBO %0, %1 @@ -118,41 +118,41 @@ body: | ; CHECK-LABEL: name: test_usubo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC4]](s16), [[TRUNC6]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC5]](s16), [[TRUNC7]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC4]](s16), [[TRUNC6]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC5]](s16), [[TRUNC7]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_USUBO %0, %1 @@ -168,74 +168,74 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_usubo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC3]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC4]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s16), [[TRUNC9]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC7]](s16), [[TRUNC10]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC3]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC4]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s16), [[TRUNC9]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC7]](s16), [[TRUNC10]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -256,75 +256,75 @@ body: | ; CHECK-LABEL: name: test_usubo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC4]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC5]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC6]] - ; CHECK: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[TRUNC7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB3]](s16) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC12]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC9]](s16), [[TRUNC13]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC10]](s16), [[TRUNC14]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC11]](s16), [[TRUNC15]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB3]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC12]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC9]](s16), [[TRUNC13]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC10]](s16), [[TRUNC14]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC11]](s16), [[TRUNC15]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_USUBO %0, %1 @@ -341,24 +341,24 @@ body: | ; CHECK-LABEL: name: test_usubo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV4]](s32), [[UV6]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV5]](s32), [[UV7]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV4]](s32), [[UV6]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV5]](s32), [[UV7]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_USUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 23558fb72c06..fe8966843e35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: usubsat_s7 @@ -12,38 +12,38 @@ body: | ; GFX6-LABEL: name: usubsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: usubsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -61,38 +61,38 @@ body: | ; GFX6-LABEL: name: usubsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: usubsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -110,85 +110,85 @@ body: | ; GFX6-LABEL: name: usubsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: usubsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[USUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[USUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -209,30 +209,30 @@ body: | ; GFX6-LABEL: name: usubsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: usubsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -250,54 +250,54 @@ body: | ; GFX6-LABEL: name: usubsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: usubsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC2]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: usubsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_USUBSAT %0, %1 @@ -312,130 +312,124 @@ body: | ; GFX6-LABEL: name: usubsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] - ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_USUBSAT %1, %2 @@ -452,98 +446,98 @@ body: | ; GFX6-LABEL: name: usubsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: usubsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC4]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC5]] - ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC6]] - ; GFX8: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC3]], [[TRUNC7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT3]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: usubsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[UV2]] - ; GFX9: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x s16>), [[USUBSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x s16>), [[USUBSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_USUBSAT %0, %1 @@ -558,20 +552,20 @@ body: | ; GFX6-LABEL: name: usubsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] - ; GFX6: $vgpr0 = COPY [[SUB]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) ; GFX8-LABEL: name: usubsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) ; GFX9-LABEL: name: usubsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_USUBSAT %0, %1 @@ -586,33 +580,33 @@ body: | ; GFX6-LABEL: name: usubsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UMIN]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UMIN]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: usubsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: usubsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] - ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_USUBSAT %0, %1 @@ -627,40 +621,40 @@ body: | ; GFX6-LABEL: name: usubsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: usubsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: usubsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_USUBSAT %0, %1 @@ -675,70 +669,70 @@ body: | ; GFX6-LABEL: name: usubsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: usubsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: usubsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_USUBSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir new file mode 100644 index 000000000000..075e09d18df6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir @@ -0,0 +1,355 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s + +--- | + + define <2 x i16> @and_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { + %and = and <2 x i16> %a, %b + ret <2 x i16> %and + } + + define <3 x i16> @add_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %add = add <3 x i16> %a, %b + ret <3 x i16> %add + } + + define <3 x i16> @shl_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %shl = shl <3 x i16> %a, %b + ret <3 x i16> %shl + } + + define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { + %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) + ret <4 x half> %fma + } + + define amdgpu_ps <5 x half> @maxnum_v5i16(<5 x half> %a, <5 x half> %b) { + %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b) + ret <5 x half> %fma + } + + declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) + declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>) +... + +--- +name: and_v2i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: and_v2i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[TRUNC1]] + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[LSHR]](s32) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(<2 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32) + %0:_(<2 x s16>) = G_TRUNC %5(<2 x s32>) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32) + %1:_(<2 x s16>) = G_TRUNC %8(<2 x s32>) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %9:_(<2 x s16>) = G_AND %0, %1 + %13:_(s16), %14:_(s16) = G_UNMERGE_VALUES %9(<2 x s16>) + %11:_(s32) = G_ANYEXT %13(s16) + %12:_(s32) = G_ANYEXT %14(s16) + $vgpr0 = COPY %11(s32) + $vgpr1 = COPY %12(s32) + %10:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %10, implicit $vgpr0, implicit $vgpr1 + +... + +--- +name: add_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: add_v3i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY3]] + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY4]] + ; GFX7-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] + ; GFX7-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[ADD2]](s32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(s32) = COPY $vgpr2 + %6:_(<3 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32) + %0:_(<3 x s16>) = G_TRUNC %6(<3 x s32>) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %1:_(<3 x s16>) = G_TRUNC %10(<3 x s32>) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %11:_(<3 x s16>) = G_ADD %0, %1 + %16:_(s16), %17:_(s16), %18:_(s16) = G_UNMERGE_VALUES %11(<3 x s16>) + %13:_(s32) = G_ANYEXT %16(s16) + %14:_(s32) = G_ANYEXT %17(s16) + %15:_(s32) = G_ANYEXT %18(s16) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + $vgpr2 = COPY %15(s32) + %12:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: shl_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: shl_v3i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[AND1]](s32) + ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND2]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[SHL1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[SHL2]](s32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(s32) = COPY $vgpr2 + %6:_(<3 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32) + %0:_(<3 x s16>) = G_TRUNC %6(<3 x s32>) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %1:_(<3 x s16>) = G_TRUNC %10(<3 x s32>) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %11:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) + %16:_(s16), %17:_(s16), %18:_(s16) = G_UNMERGE_VALUES %11(<3 x s16>) + %13:_(s32) = G_ANYEXT %16(s16) + %14:_(s32) = G_ANYEXT %17(s16) + %15:_(s32) = G_ANYEXT %18(s16) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + $vgpr2 = COPY %15(s32) + %12:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: fma_v4f16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: fma_v4f16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX7-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX7-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; GFX7-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) + ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX7-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX7-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; GFX7-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] + ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](s32) + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %0:_(<4 x s16>) = G_TRUNC %8(<4 x s32>) + %9:_(s32) = COPY $vgpr4 + %10:_(s32) = COPY $vgpr5 + %11:_(s32) = COPY $vgpr6 + %12:_(s32) = COPY $vgpr7 + %13:_(<4 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32), %11(s32), %12(s32) + %1:_(<4 x s16>) = G_TRUNC %13(<4 x s32>) + %14:_(s32) = COPY $vgpr8 + %15:_(s32) = COPY $vgpr9 + %16:_(s32) = COPY $vgpr10 + %17:_(s32) = COPY $vgpr11 + %18:_(<4 x s32>) = G_BUILD_VECTOR %14(s32), %15(s32), %16(s32), %17(s32) + %2:_(<4 x s16>) = G_TRUNC %18(<4 x s32>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %19:_(<4 x s16>) = G_FMA %0, %1, %2 + %25:_(s16), %26:_(s16), %27:_(s16), %28:_(s16) = G_UNMERGE_VALUES %19(<4 x s16>) + %21:_(s32) = G_ANYEXT %25(s16) + %22:_(s32) = G_ANYEXT %26(s16) + %23:_(s32) = G_ANYEXT %27(s16) + %24:_(s32) = G_ANYEXT %28(s16) + $vgpr0 = COPY %21(s32) + $vgpr1 = COPY %22(s32) + $vgpr2 = COPY %23(s32) + $vgpr3 = COPY %24(s32) + %20:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %20, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +... + +--- +name: maxnum_v5i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + + ; GFX7-LABEL: name: maxnum_v5i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) + ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) + ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT8]], [[FPEXT9]] + ; GFX7-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE4]](s32) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) + ; GFX7-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC4]](s16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](s32) + ; GFX7-NEXT: $vgpr4 = COPY [[ANYEXT4]](s32) + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %6:_(s32) = COPY $vgpr4 + %7:_(<5 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32), %5(s32), %6(s32) + %0:_(<5 x s16>) = G_TRUNC %7(<5 x s32>) + %8:_(s32) = COPY $vgpr5 + %9:_(s32) = COPY $vgpr6 + %10:_(s32) = COPY $vgpr7 + %11:_(s32) = COPY $vgpr8 + %12:_(s32) = COPY $vgpr9 + %13:_(<5 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32), %12(s32) + %1:_(<5 x s16>) = G_TRUNC %13(<5 x s32>) + %15:_(<5 x s16>) = G_FMAXNUM %0, %1 + %21:_(s16), %22:_(s16), %23:_(s16), %24:_(s16), %25:_(s16) = G_UNMERGE_VALUES %15(<5 x s16>) + %16:_(s32) = G_ANYEXT %21(s16) + %17:_(s32) = G_ANYEXT %22(s16) + %18:_(s32) = G_ANYEXT %23(s16) + %19:_(s32) = G_ANYEXT %24(s16) + %20:_(s32) = G_ANYEXT %25(s16) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + $vgpr3 = COPY %19(s32) + $vgpr4 = COPY %20(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir new file mode 100644 index 000000000000..7e00c75a2a51 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir @@ -0,0 +1,477 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s + +--- | + + define <2 x i16> @and_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { + %and = and <2 x i16> %a, %b + ret <2 x i16> %and + } + + define <3 x i16> @add_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %add = add <3 x i16> %a, %b + ret <3 x i16> %add + } + + define <3 x i16> @shl_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %shl = shl <3 x i16> %a, %b + ret <3 x i16> %shl + } + + define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { + %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) + ret <4 x half> %fma + } + + define amdgpu_ps <5 x half> @maxnum_v5i16(<5 x half> %a, <5 x half> %b) { + %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b) + ret <5 x half> %fma + } + + declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) + declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>) +... + +--- +name: and_v2i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: and_v2i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-LABEL: name: and_v2i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %3:_(<2 x s16>) = G_AND %0, %1 + $vgpr0 = COPY %3(<2 x s16>) + %4:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %4, implicit $vgpr0 +... + +--- +name: add_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: add_v3i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-LABEL: name: add_v3i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ADD]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %3:_(<2 x s16>) = COPY $vgpr0 + %4:_(<2 x s16>) = COPY $vgpr1 + %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) + %10:_(<2 x s16>) = COPY $vgpr2 + %11:_(<2 x s16>) = COPY $vgpr3 + %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) + %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %17:_(<3 x s16>) = G_ADD %0, %1 + %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) + %24:_(s16) = G_IMPLICIT_DEF + %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) + %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + %18:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1 +... + +--- +name: shl_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: shl_v3i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-LABEL: name: shl_v3i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %3:_(<2 x s16>) = COPY $vgpr0 + %4:_(<2 x s16>) = COPY $vgpr1 + %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) + %10:_(<2 x s16>) = COPY $vgpr2 + %11:_(<2 x s16>) = COPY $vgpr3 + %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) + %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %17:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) + %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) + %24:_(s16) = G_IMPLICIT_DEF + %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) + %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + %18:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1 +... + +--- +name: fma_v4f16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: fma_v4f16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX8-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] + ; GFX8-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] + ; GFX8-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] + ; GFX8-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-LABEL: name: fma_v4f16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY2]], [[COPY4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY1]], [[COPY3]], [[COPY5]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(<4 x s16>) = G_FMA %0, %1, %2 + %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %10(<4 x s16>) + $vgpr0 = COPY %12(<2 x s16>) + $vgpr1 = COPY %13(<2 x s16>) + %11:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %11, implicit $vgpr0, implicit $vgpr1 +... + +--- +name: maxnum_v5i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX8-LABEL: name: maxnum_v5i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX8-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; GFX8-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] + ; GFX8-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX8-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; GFX8-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] + ; GFX8-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; GFX8-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; GFX8-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC8]] + ; GFX8-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]] + ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-LABEL: name: maxnum_v5i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC5]] + ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST8]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %2:_(<2 x s16>) = COPY $vgpr0 + %3:_(<2 x s16>) = COPY $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr2 + %5:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16) = G_UNMERGE_VALUES %5(<6 x s16>) + %0:_(<5 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16), %9(s16), %10(s16) + %12:_(<2 x s16>) = COPY $vgpr3 + %13:_(<2 x s16>) = COPY $vgpr4 + %14:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %12(<2 x s16>), %13(<2 x s16>), %14(<2 x s16>) + %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16), %20:_(s16), %21:_(s16) = G_UNMERGE_VALUES %15(<6 x s16>) + %1:_(<5 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16), %20(s16) + %23:_(<5 x s16>) = G_FMAXNUM %0, %1 + %27:_(s16), %28:_(s16), %29:_(s16), %30:_(s16), %31:_(s16) = G_UNMERGE_VALUES %23(<5 x s16>) + %32:_(s16) = G_IMPLICIT_DEF + %33:_(<6 x s16>) = G_BUILD_VECTOR %27(s16), %28(s16), %29(s16), %30(s16), %31(s16), %32(s16) + %24:_(<2 x s16>), %25:_(<2 x s16>), %26:_(<2 x s16>) = G_UNMERGE_VALUES %33(<6 x s16>) + $vgpr0 = COPY %24(<2 x s16>) + $vgpr1 = COPY %25(<2 x s16>) + $vgpr2 = COPY %26(<2 x s16>) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index a0a2e3ee8303..e954104c2f70 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_xor_s32 @@ -305,18 +305,14 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[XOR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_XOR %0, %1 @@ -353,23 +349,20 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_XOR %0, %1 @@ -424,40 +417,65 @@ body: | ; CHECK-LABEL: name: test_xor_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -492,54 +510,102 @@ body: | ; CHECK-LABEL: name: test_xor_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_XOR %0, %1 @@ -554,34 +620,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_XOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir index 5cac99355652..b62013d28e81 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -142,10 +142,7 @@ body: | ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -707,12 +704,6 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32) ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) @@ -737,28 +728,16 @@ body: | ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]] ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C3]] - ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]] - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]] - ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C3]] - ; CHECK-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C3]] - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]] + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32) - ; CHECK-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; CHECK-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C3]] - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]] - ; CHECK-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C3]] - ; CHECK-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]] - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32) - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]] - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32) - ; CHECK-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C3]] - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]] - ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR11]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR12]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]] + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32) ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384) ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll index 47c4ac2aa042..88b3244df5fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -509,13 +509,10 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff -; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0 -; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1 -; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v1 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v2 +; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-UNPACKED-NEXT: ; return to shader part epilog ; @@ -530,12 +527,11 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 -; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff -; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0 -; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 -; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: @@ -552,7 +548,10 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 +; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: load_1d_v3f16_xyz: @@ -565,11 +564,15 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 ; GFX10-NEXT: ; return to shader part epilog %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 36ae4e61ab3c..b93c0102f171 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -9,46 +9,49 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val } @@ -56,46 +59,49 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_i32_glc ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_glc ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_glc ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) ret i32 %val } @@ -103,61 +109,64 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v2i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX7-LABEL: name: s_buffer_load_v2i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX8-LABEL: name: s_buffer_load_v2i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val } @@ -165,85 +174,79 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v3i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val } @@ -251,133 +254,136 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v8i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX7-LABEL: name: s_buffer_load_v8i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX8-LABEL: name: s_buffer_load_v8i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val } @@ -385,229 +391,232 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v16i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX6: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX6: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX6: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX6: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX6: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX6: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX6: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX6: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX6: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX6: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX6: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX6: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX6: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX6: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX6: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX6: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX6: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX6: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX6: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX6: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX6: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX6: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX6: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX6: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX7-LABEL: name: s_buffer_load_v16i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX7: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX7: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX7: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX7: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX7: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX7: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX7: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX7: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX7: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX7: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX7: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX7: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX7: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX7: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX7: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX7: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX7: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX7: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX7: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX7: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX7: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX7: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX7: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX7: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX8-LABEL: name: s_buffer_load_v16i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX8: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX8: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX8: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX8: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX8: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX8: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX8: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX8: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX8: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX8: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX8: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX8: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX8: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX8: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX8: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX8: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX8: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX8: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX8: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX8: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX8: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX8: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX8: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX8: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val } @@ -615,45 +624,48 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) ret i32 %val } @@ -661,43 +673,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) ret i32 %val } @@ -705,45 +720,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_255 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) ret i32 %val } @@ -751,43 +769,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_256 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) ret i32 %val } @@ -795,43 +816,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) ret i32 %val } @@ -839,45 +863,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) ret i32 %val } @@ -885,44 +912,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) ret i32 %val } @@ -930,45 +960,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) ret i32 %val } @@ -976,46 +1009,49 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) ret i32 %load } @@ -1023,45 +1059,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) ret i32 %load } @@ -1069,45 +1108,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) ret i32 %load } @@ -1115,45 +1157,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) ret i32 %load } @@ -1161,45 +1206,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) ret i32 %load } @@ -1207,45 +1255,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) ret i32 %load } @@ -1253,45 +1304,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) ret i32 %load } @@ -1299,45 +1353,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) ret i32 %load } @@ -1345,45 +1402,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) ret i32 %load } @@ -1391,44 +1451,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) ret i32 %load } @@ -1436,45 +1499,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) ret i32 %load } @@ -1483,43 +1549,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -1527,52 +1596,55 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val } @@ -1580,73 +1652,64 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val } @@ -1654,64 +1717,67 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val } @@ -1719,94 +1785,97 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val } @@ -1814,148 +1883,151 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val } @@ -1963,43 +2035,46 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2008,43 +2083,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2053,43 +2131,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2099,94 +2180,97 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2196,94 +2280,97 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2292,148 +2379,151 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2442,148 +2532,151 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2593,109 +2686,124 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -2704,103 +2812,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2810,115 +2933,130 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2928,103 +3066,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val } @@ -3033,107 +3186,122 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val } @@ -3143,154 +3311,169 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3301,166 +3484,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: $vgpr3 = COPY [[COPY11]] - ; GFX6: $vgpr4 = COPY [[COPY12]] - ; GFX6: $vgpr5 = COPY [[COPY13]] - ; GFX6: $vgpr6 = COPY [[COPY14]] - ; GFX6: $vgpr7 = COPY [[COPY15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: $vgpr3 = COPY [[COPY11]] - ; GFX7: $vgpr4 = COPY [[COPY12]] - ; GFX7: $vgpr5 = COPY [[COPY13]] - ; GFX7: $vgpr6 = COPY [[COPY14]] - ; GFX7: $vgpr7 = COPY [[COPY15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: $vgpr3 = COPY [[COPY11]] - ; GFX8: $vgpr4 = COPY [[COPY12]] - ; GFX8: $vgpr5 = COPY [[COPY13]] - ; GFX8: $vgpr6 = COPY [[COPY14]] - ; GFX8: $vgpr7 = COPY [[COPY15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3469,166 +3667,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: $vgpr3 = COPY [[COPY11]] - ; GFX6: $vgpr4 = COPY [[COPY12]] - ; GFX6: $vgpr5 = COPY [[COPY13]] - ; GFX6: $vgpr6 = COPY [[COPY14]] - ; GFX6: $vgpr7 = COPY [[COPY15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: $vgpr3 = COPY [[COPY11]] - ; GFX7: $vgpr4 = COPY [[COPY12]] - ; GFX7: $vgpr5 = COPY [[COPY13]] - ; GFX7: $vgpr6 = COPY [[COPY14]] - ; GFX7: $vgpr7 = COPY [[COPY15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: $vgpr3 = COPY [[COPY11]] - ; GFX8: $vgpr4 = COPY [[COPY12]] - ; GFX8: $vgpr5 = COPY [[COPY13]] - ; GFX8: $vgpr6 = COPY [[COPY14]] - ; GFX8: $vgpr7 = COPY [[COPY15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3637,157 +3850,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3796,157 +4024,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3955,157 +4198,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -4114,154 +4372,169 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY6]] - ; GFX6: $vgpr1 = COPY [[COPY7]] - ; GFX6: $vgpr2 = COPY [[COPY8]] - ; GFX6: $vgpr3 = COPY [[COPY9]] - ; GFX6: $vgpr4 = COPY [[COPY10]] - ; GFX6: $vgpr5 = COPY [[COPY11]] - ; GFX6: $vgpr6 = COPY [[COPY12]] - ; GFX6: $vgpr7 = COPY [[COPY13]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY6]] - ; GFX7: $vgpr1 = COPY [[COPY7]] - ; GFX7: $vgpr2 = COPY [[COPY8]] - ; GFX7: $vgpr3 = COPY [[COPY9]] - ; GFX7: $vgpr4 = COPY [[COPY10]] - ; GFX7: $vgpr5 = COPY [[COPY11]] - ; GFX7: $vgpr6 = COPY [[COPY12]] - ; GFX7: $vgpr7 = COPY [[COPY13]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY6]] - ; GFX8: $vgpr1 = COPY [[COPY7]] - ; GFX8: $vgpr2 = COPY [[COPY8]] - ; GFX8: $vgpr3 = COPY [[COPY9]] - ; GFX8: $vgpr4 = COPY [[COPY10]] - ; GFX8: $vgpr5 = COPY [[COPY11]] - ; GFX8: $vgpr6 = COPY [[COPY12]] - ; GFX8: $vgpr7 = COPY [[COPY13]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val } @@ -4269,43 +4542,46 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -4314,43 +4590,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -4359,52 +4638,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4414,52 +4696,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4470,49 +4755,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4522,52 +4810,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index c99da17fe964..98bc9815f001 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -530,20 +530,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load @@ -552,20 +552,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_i96_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_i96_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load i96, i96 addrspace(4)* %ptr, align 8 ret i96 %load @@ -574,20 +574,20 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 ret <3 x i32> %load @@ -596,20 +596,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v6i16_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v6i16_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 %cast = bitcast <6 x i16> %load to <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 925c7bd4a17e..c11a19481949 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -6,32 +6,34 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GREEDY-LABEL: name: s_buffer_load_i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val } @@ -39,40 +41,42 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val } @@ -80,56 +84,48 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK: $sgpr2 = COPY [[INT2]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GREEDY: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val } @@ -137,76 +133,78 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK: $sgpr2 = COPY [[INT2]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK: $sgpr3 = COPY [[INT3]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK: $sgpr4 = COPY [[INT4]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK: $sgpr5 = COPY [[INT5]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK: $sgpr6 = COPY [[INT6]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK: $sgpr7 = COPY [[INT7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY: $sgpr7 = COPY [[INT7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val } @@ -214,124 +212,126 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK: $sgpr2 = COPY [[INT2]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK: $sgpr3 = COPY [[INT3]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK: $sgpr4 = COPY [[INT4]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK: $sgpr5 = COPY [[INT5]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK: $sgpr6 = COPY [[INT6]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK: $sgpr7 = COPY [[INT7]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; CHECK: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; CHECK: $sgpr8 = COPY [[INT8]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; CHECK: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; CHECK: $sgpr9 = COPY [[INT9]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; CHECK: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; CHECK: $sgpr10 = COPY [[INT10]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; CHECK: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; CHECK: $sgpr11 = COPY [[INT11]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; CHECK: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; CHECK: $sgpr12 = COPY [[INT12]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; CHECK: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; CHECK: $sgpr13 = COPY [[INT13]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; CHECK: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; CHECK: $sgpr14 = COPY [[INT14]](s32) - ; CHECK: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; CHECK: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; CHECK: $sgpr15 = COPY [[INT15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) + ; CHECK-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) + ; CHECK-NEXT: $sgpr8 = COPY [[INT8]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) + ; CHECK-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) + ; CHECK-NEXT: $sgpr9 = COPY [[INT9]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) + ; CHECK-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) + ; CHECK-NEXT: $sgpr10 = COPY [[INT10]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) + ; CHECK-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) + ; CHECK-NEXT: $sgpr11 = COPY [[INT11]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) + ; CHECK-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) + ; CHECK-NEXT: $sgpr12 = COPY [[INT12]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) + ; CHECK-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[INT13]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) + ; CHECK-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[INT14]](s32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) + ; CHECK-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY: $sgpr7 = COPY [[INT7]](s32) - ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; GREEDY: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; GREEDY: $sgpr8 = COPY [[INT8]](s32) - ; GREEDY: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; GREEDY: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; GREEDY: $sgpr9 = COPY [[INT9]](s32) - ; GREEDY: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; GREEDY: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; GREEDY: $sgpr10 = COPY [[INT10]](s32) - ; GREEDY: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; GREEDY: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; GREEDY: $sgpr11 = COPY [[INT11]](s32) - ; GREEDY: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; GREEDY: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; GREEDY: $sgpr12 = COPY [[INT12]](s32) - ; GREEDY: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; GREEDY: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; GREEDY: $sgpr13 = COPY [[INT13]](s32) - ; GREEDY: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; GREEDY: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; GREEDY: $sgpr14 = COPY [[INT14]](s32) - ; GREEDY: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; GREEDY: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; GREEDY: $sgpr15 = COPY [[INT15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) + ; GREEDY-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) + ; GREEDY-NEXT: $sgpr8 = COPY [[INT8]](s32) + ; GREEDY-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) + ; GREEDY-NEXT: $sgpr9 = COPY [[INT9]](s32) + ; GREEDY-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) + ; GREEDY-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) + ; GREEDY-NEXT: $sgpr10 = COPY [[INT10]](s32) + ; GREEDY-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) + ; GREEDY-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) + ; GREEDY-NEXT: $sgpr11 = COPY [[INT11]](s32) + ; GREEDY-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) + ; GREEDY-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) + ; GREEDY-NEXT: $sgpr12 = COPY [[INT12]](s32) + ; GREEDY-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) + ; GREEDY-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) + ; GREEDY-NEXT: $sgpr13 = COPY [[INT13]](s32) + ; GREEDY-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) + ; GREEDY-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) + ; GREEDY-NEXT: $sgpr14 = COPY [[INT14]](s32) + ; GREEDY-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) + ; GREEDY-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val } @@ -340,32 +340,34 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -373,36 +375,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val } @@ -410,52 +414,40 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; GREEDY: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val } @@ -463,40 +455,42 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GREEDY-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val } @@ -504,52 +498,54 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val } @@ -557,72 +553,74 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY: $vgpr15 = COPY [[UV15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val } @@ -630,36 +628,38 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_i96_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; CHECK: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) + ; CHECK-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; GREEDY: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) + ; GREEDY-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) store i96 %val, i96 addrspace(1)* undef ret void @@ -669,46 +669,48 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_i256_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) + ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) + ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) store i256 %val, i256 addrspace(1)* undef ret void @@ -718,62 +720,64 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_i512_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) + ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) + ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) store i512 %val, i512 addrspace(1)* undef ret void @@ -783,46 +787,48 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <16 x i16> %val, <16 x i16> addrspace(1)* undef ret void @@ -832,62 +838,64 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) + ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) + ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <32 x i16> %val, <32 x i16> addrspace(1)* undef ret void @@ -897,46 +905,48 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i64> %val, <4 x i64> addrspace(1)* undef ret void @@ -946,62 +956,64 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i64> %val, <8 x i64> addrspace(1)* undef ret void @@ -1011,46 +1023,48 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v4p1_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef ret void @@ -1060,62 +1074,64 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8p1_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef ret void @@ -1124,38 +1140,40 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1164,38 +1182,40 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1204,36 +1224,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1243,58 +1265,60 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1304,56 +1328,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1362,78 +1388,80 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY: $vgpr15 = COPY [[UV15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -1442,76 +1470,78 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY: $vgpr15 = COPY [[UV15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -1521,78 +1551,88 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -1601,80 +1641,90 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1684,82 +1734,92 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1769,78 +1829,88 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val } @@ -1849,78 +1919,88 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val } @@ -1930,100 +2010,110 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2034,102 +2124,112 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2138,102 +2238,112 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2242,100 +2352,110 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2344,100 +2464,110 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2446,100 +2576,110 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2548,98 +2688,108 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val } @@ -2647,36 +2797,38 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -2685,36 +2837,38 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -2723,44 +2877,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2770,44 +2926,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2818,40 +2976,42 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2861,42 +3021,44 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 6ab65dbd4821..d8a19824fde9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -115,12 +115,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.global.not.uniform.v8i32) ... @@ -135,12 +135,12 @@ body: | ; CHECK-LABEL: name: load_global_v4i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.global.not.uniform.v4i64) ... @@ -154,18 +154,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.global.not.uniform.v16i32) ... @@ -179,18 +179,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.global.not.uniform.v8i64) ... @@ -204,7 +204,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s32>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load (<8 x s32>), addrspace 1) ... @@ -218,7 +218,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v4i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x s64>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x s64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load (<4 x s64>), addrspace 1) ... @@ -232,7 +232,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x s32>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x s32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load (<16 x s32>), addrspace 1) ... @@ -246,7 +246,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s64>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load (<8 x s64>), addrspace 1) ... @@ -260,12 +260,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.constant.not.uniform.v8i32) ... @@ -279,12 +279,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i256_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) - ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s256) = G_LOAD %0 :: (load (s256) from %ir.constant.not.uniform) ... @@ -299,12 +299,12 @@ body: | ; CHECK-LABEL: name: load_constant_v16i16_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>) from %ir.constant.not.uniform) ... @@ -318,12 +318,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.constant.not.uniform.v4i64) ... @@ -337,18 +337,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.constant.not.uniform.v16i32) ... @@ -362,18 +362,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.constant.not.uniform.v8i64) ... @@ -387,7 +387,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) ... @@ -401,7 +401,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i16_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>), addrspace 4) ... @@ -415,7 +415,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), addrspace 4) ... @@ -429,7 +429,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) ... @@ -443,7 +443,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load (<8 x s64>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load (<8 x s64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), addrspace 4) ... @@ -457,8 +457,8 @@ body: | ; CHECK-LABEL: name: load_local_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) @@ -472,8 +472,8 @@ body: | ; CHECK-LABEL: name: load_region_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5) @@ -488,8 +488,8 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -504,8 +504,8 @@ body: | ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -520,8 +520,8 @@ body: | ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -536,8 +536,8 @@ body: | ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -551,7 +551,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 4) ... @@ -566,8 +566,8 @@ body: | ; CHECK-LABEL: name: load_constant_i32_uniform_align2 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) ... @@ -582,8 +582,8 @@ body: | ; CHECK-LABEL: name: load_constant_i32_uniform_align1 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) ... @@ -598,8 +598,8 @@ body: | ; CHECK-LABEL: name: load_private_uniform_sgpr_i32 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) %0:_(p5) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5, align 4) ... @@ -615,12 +615,13 @@ body: | ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash ; CHECK: liveins: $vgpr0_vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) ... @@ -633,21 +634,24 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load (<4 x s32>), align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) - ; CHECK: G_BR %bb.1 + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load (<4 x s32>), align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) + ; CHECK-NEXT: G_BR %bb.1 bb.0: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 @@ -672,14 +676,13 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), align 4, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -694,14 +697,13 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -716,9 +718,10 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -733,14 +736,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), align 4, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -755,14 +758,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -777,9 +780,10 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0 - ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16), [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16), [[UV6:%[0-9]+]]:sgpr(s16), [[UV7:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -794,14 +798,13 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), align 4, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -816,14 +819,13 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -838,9 +840,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0 - ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s96) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[LOAD]](s128) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16) S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir index 163a83aac29b..bf150a54f434 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -30,14 +30,13 @@ body: | ; SI-LABEL: name: split_smrd_load_range ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) - ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) + ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) + ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !0) $sgpr0_sgpr1_sgpr2 = COPY %1 @@ -53,14 +52,13 @@ body: | ; SI-LABEL: name: split_smrd_load_tbaa ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), !tbaa !2, addrspace 4) - ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) - ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), !tbaa !2, addrspace 4) + ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) + ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1) $sgpr0_sgpr1_sgpr2 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 740fc68c4b3d..65c62c47a823 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1051,10 +1051,11 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s4, 0xf000 -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s5, 0xf000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1066,10 +1067,10 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1098,10 +1099,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1129,55 +1129,56 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: v_mov_b32_e32 v8, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: v_mov_b32_e32 v7, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc @@ -1490,41 +1491,169 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_movk_i32 s6, 0xf000 -; CGP-NEXT: s_movk_i32 s7, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_movk_i32 s6, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s7, 0xf000 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v11, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1535,27 +1664,27 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1566,195 +1695,69 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v11, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v12, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result @@ -1764,10 +1767,11 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1779,10 +1783,10 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1811,10 +1815,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1842,55 +1845,56 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: v_mov_b32_e32 v8, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: v_mov_b32_e32 v7, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc @@ -2203,41 +2207,169 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s7, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_mov_b32 s6, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v11, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2248,27 +2380,27 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2279,195 +2411,69 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v11, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v12, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 5323967b4fc4..e75db654647b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1031,10 +1031,11 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s4, 0xf000 -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s5, 0xf000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1046,10 +1047,10 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1078,10 +1079,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1109,49 +1109,50 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc @@ -1466,41 +1467,166 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_srem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_movk_i32 s6, 0xf000 -; CGP-NEXT: s_movk_i32 s7, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_movk_i32 s6, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s7, 0xf000 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v8, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1511,27 +1637,27 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1542,191 +1668,68 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 -; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_mov_b32_e32 v8, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v9, s4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result @@ -1736,10 +1739,11 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1751,10 +1755,10 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1783,10 +1787,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1814,49 +1817,50 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc @@ -2171,41 +2175,166 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_srem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s7, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_mov_b32 s6, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v8, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2216,27 +2345,27 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2247,191 +2376,68 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 -; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_mov_b32_e32 v8, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v9, s4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index ce21d2546b8a..fdf5ba53330c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -968,129 +968,130 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_urem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000 +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CHECK-NEXT: v_mov_b32_e32 v3, s5 -; CHECK-NEXT: v_mov_b32_e32 v4, s7 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v5, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6 -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s4 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 +; CHECK-NEXT: v_mov_b32_e32 v6, s7 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, s5, v3 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_mul_lo_u32 v10, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v4, v7 +; CHECK-NEXT: v_mul_hi_u32 v13, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v13 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v11 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v8, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v9, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v10, s5, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v4, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v13, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s4, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v7 -; CHECK-NEXT: v_mul_lo_u32 v9, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i64 %num, 1235195 ret i64 %result @@ -1357,63 +1358,64 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_urem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: s_mov_b32 s8, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s8, 0x12d8fb ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: s_bfe_i32 s5, -1, 0x10000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; CGP-NEXT: s_bfe_i32 s7, -1, 0x10000 ; CGP-NEXT: s_bfe_i32 s9, -1, 0x10000 -; CGP-NEXT: v_mov_b32_e32 v6, v4 -; CGP-NEXT: v_mov_b32_e32 v7, s4 -; CGP-NEXT: v_mov_b32_e32 v8, s5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_mov_b32_e32 v9, s7 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s8 +; CGP-NEXT: v_mov_b32_e32 v8, s4 +; CGP-NEXT: v_mov_b32_e32 v9, s5 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v10 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 ; CGP-NEXT: v_trunc_f32_e32 v10, v10 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v10 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 ; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 -; CGP-NEXT: v_mul_lo_u32 v13, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v14, -1, v4 -; CGP-NEXT: v_mul_hi_u32 v15, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v16, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v17, -1, v5 -; CGP-NEXT: v_mul_hi_u32 v18, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v17, -1, v6 +; CGP-NEXT: v_mul_hi_u32 v18, s6, v6 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; CGP-NEXT: v_mul_lo_u32 v14, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v19, v4, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v16 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v16 +; CGP-NEXT: v_mul_hi_u32 v15, v6, v16 ; CGP-NEXT: v_mul_hi_u32 v16, v10, v16 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_mul_lo_u32 v18, v5, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v6, v12 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_mul_lo_u32 v15, v4, v11 -; CGP-NEXT: v_mul_lo_u32 v17, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v19 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v12 @@ -1424,7 +1426,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v17, v14 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc @@ -1439,38 +1441,38 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v13, -1, v4 -; CGP-NEXT: v_mul_hi_u32 v14, s6, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v11, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v12, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v15, -1, v5 -; CGP-NEXT: v_mul_hi_u32 v16, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v17, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v18, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v15, -1, v6 +; CGP-NEXT: v_mul_hi_u32 v16, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v17, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; CGP-NEXT: v_mul_lo_u32 v17, s6, v10 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v12 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v14, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_mul_lo_u32 v16, v5, v15 +; CGP-NEXT: v_mul_lo_u32 v16, v6, v15 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_lo_u32 v14, v4, v13 -; CGP-NEXT: v_mul_lo_u32 v16, v6, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v16, v7, v13 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v13 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v15 @@ -1481,125 +1483,126 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v17, v5, v15 +; CGP-NEXT: v_mul_hi_u32 v17, v6, v15 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; CGP-NEXT: v_mov_b32_e32 v19, s9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 +; CGP-NEXT: v_mov_b32_e32 v19, s7 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; CGP-NEXT: v_mov_b32_e32 v18, s9 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v16 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v3, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v2, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v1, v5 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v2, v6 -; CGP-NEXT: v_mul_lo_u32 v16, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v17, v2, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v12, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v18, v0, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v10 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v1, v10 -; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v16, v4 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_mul_lo_u32 v11, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v10 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v17 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; CGP-NEXT: v_mul_hi_u32 v11, v2, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v3, v10 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v6 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v13, s8, v4 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s8, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 ; CGP-NEXT: v_mul_lo_u32 v14, s8, v5 -; CGP-NEXT: v_mul_lo_u32 v16, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v15, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, s8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_mul_lo_u32 v6, s8, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v6 +; CGP-NEXT: v_mul_lo_u32 v16, 0, v6 +; CGP-NEXT: v_mul_hi_u32 v6, s8, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v7, s8, v7 ; CGP-NEXT: v_mul_lo_u32 v10, s8, v10 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v5, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 -; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] -; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[6:7] -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12 +; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v3, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2 -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0 +; CGP-NEXT: v_cndmask_b32_e32 v6, v19, v6, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7 -; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v2, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; CGP-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11 -; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4 +; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, v11, v12, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5] -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll new file mode 100755 index 000000000000..d7fa098d5eb8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll @@ -0,0 +1,215 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX9 %s + +; GCN-LABEL: name: uniform_vec_0_i16 +; GCN: S_LSHL_B32 +define amdgpu_kernel void @uniform_vec_0_i16(i32 addrspace(1)* %out, i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 0, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_0_i16 +; GCN: V_LSHLREV_B32_e64 +define i32 @divergent_vec_0_i16(i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 0, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_i16_0 +; GCN: S_AND_B32 +define amdgpu_kernel void @uniform_vec_i16_0(i32 addrspace(1)* %out, i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 0, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_0 +; GCN: V_AND_B32_e64 +define i32 @divergent_vec_i16_0(i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 0, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_f16_0 +; GCN: S_AND_B32 +define amdgpu_kernel void @uniform_vec_f16_0(float addrspace(1)* %out, half %a) { + %tmp = insertelement <2 x half> undef, half %a, i32 0 + %vec = insertelement <2 x half> %tmp, half 0.0, i32 1 + %val = bitcast <2 x half> %vec to float + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_f16_0 +; GCN: V_CVT_F16_F32_e64 0, %0 +; GCN: COPY %1 + +; GFX9-LABEL: name: divergent_vec_f16_0 +; GFX9: V_AND_B32_e64 +define float @divergent_vec_f16_0(half %a) { + %tmp = insertelement <2 x half> undef, half %a, i32 0 + %vec = insertelement <2 x half> %tmp, half 0.0, i32 1 + %val = bitcast <2 x half> %vec to float + ret float %val +} + +; GCN-LABEL: name: uniform_vec_i16_LL +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] +; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]] + +; GFX9-LABEL: name: uniform_vec_i16_LL +; GFX9: S_PACK_LL_B32_B16 +define amdgpu_kernel void @uniform_vec_i16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) { + %val0 = load volatile i32, i32 addrspace(4)* %in0 + %val1 = load volatile i32, i32 addrspace(4)* %in1 + %lo = trunc i32 %val0 to i16 + %hi = trunc i32 %val1 to i16 + %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0 + %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1 + %vec.i32 = bitcast <2 x i16> %vec.1 to i32 + call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_LL +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]], %1, implicit $exec +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %0, killed %[[IMM]], implicit $exec +; GCN: V_OR_B32_e64 killed %[[AND]], killed %[[SHL]], implicit $exec + +; GFX9-LABEL: name: divergent_vec_i16_LL +; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535 +; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]] +; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]] +define i32 @divergent_vec_i16_LL(i16 %a, i16 %b) { + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %b, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_i16_LH +; GCN-DAG: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN-DAG: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN-DAG: %[[NEG:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 +; GCN-DAG: %[[ANDN:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[NEG]] +; GCN: S_OR_B32 killed %[[AND]], killed %[[ANDN]] + +; GFX9-LABEL: name: uniform_vec_i16_LH +; GFX9: S_PACK_LH_B32_B16 +define amdgpu_kernel void @uniform_vec_i16_LH(i32 addrspace(1)* %out, i16 %a, i32 %b) { + %shift = lshr i32 %b, 16 + %tr = trunc i32 %shift to i16 + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_LH +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: V_BFI_B32_e64 killed %[[IMM]] +define i32 @divergent_vec_i16_LH(i16 %a, i32 %b) { + %shift = lshr i32 %b, 16 + %tr = trunc i32 %shift to i16 + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_i16_HH +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHR:[0-9]+]]:sreg_32 = S_LSHR_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN: S_OR_B32 killed %[[SHR]], killed %[[AND]] + +; GFX9-LABEL: name: uniform_vec_i16_HH +; GFX9: S_PACK_HH_B32_B16 +define amdgpu_kernel void @uniform_vec_i16_HH(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %shift_a = lshr i32 %a, 16 + %tr_a = trunc i32 %shift_a to i16 + %shift_b = lshr i32 %b, 16 + %tr_b = trunc i32 %shift_b to i16 + %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_HH +; GCN: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 killed %{{[0-9]+}}, %0, implicit $exec +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 +; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %1, killed %[[IMM]], implicit $exec +; GCN: V_OR_B32_e64 killed %[[SHR]], killed %[[AND]], implicit $exec + +; GFX9-LABEL: name: divergent_vec_i16_HH +; GFX9: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %0 +; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -65536, implicit $exec +; GFX9: V_AND_OR_B32_e64 %1, killed %[[IMM]], killed %[[SHR]] +define i32 @divergent_vec_i16_HH(i32 %a, i32 %b) { + %shift_a = lshr i32 %a, 16 + %tr_a = trunc i32 %shift_a to i16 + %shift_b = lshr i32 %b, 16 + %tr_b = trunc i32 %shift_b to i16 + %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_f16_LL +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] +; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]] + +; GFX9-LABEL: name: uniform_vec_f16_LL +; GFX9: S_PACK_LL_B32_B16 +define amdgpu_kernel void @uniform_vec_f16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) { + %val0 = load volatile i32, i32 addrspace(4)* %in0 + %val1 = load volatile i32, i32 addrspace(4)* %in1 + %lo.i = trunc i32 %val0 to i16 + %hi.i = trunc i32 %val1 to i16 + %lo = bitcast i16 %lo.i to half + %hi = bitcast i16 %hi.i to half + %vec.0 = insertelement <2 x half> undef, half %lo, i32 0 + %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1 + %vec.i32 = bitcast <2 x half> %vec.1 to i32 + + call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0 + ret void +} + +; GCN-LABEL: name: divergent_vec_f16_LL +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]] +; GCN: V_OR_B32_e64 killed %{{[0-9]+}}, killed %[[SHL]], implicit $exec + +; GFX9-LABEL: name: divergent_vec_f16_LL +; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535 +; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]] +; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]] +define float @divergent_vec_f16_LL(half %a, half %b) { + %tmp = insertelement <2 x half> undef, half %a, i32 0 + %vec = insertelement <2 x half> %tmp, half %b, i32 1 + %val = bitcast <2 x half> %vec to float + ret float %val +} diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll new file mode 100644 index 000000000000..4429ee6f3ba6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: name: uniform_trunc_i16_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i16_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i32_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i32_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i64_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i64_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll index e04eec3e3ab7..b27943195857 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll @@ -428,30 +428,31 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre ; ; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX7: ; %bb.0: ; %main_body -; G_GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd +; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd ; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xf +; G_GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX7-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX7-NEXT: s_waitcnt vmcnt(0) ; G_GFX7-NEXT: ds_write_b32 v1, v0 ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX10: ; %bb.0: ; %main_body -; G_GFX10-NEXT: s_clause 0x2 -; G_GFX10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x3c ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX10-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX10-NEXT: s_load_dword s0, s[0:1], 0x3c ; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX10-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX10-NEXT: s_waitcnt vmcnt(0) ; G_GFX10-NEXT: ds_write_b32 v1, v0 ; G_GFX10-NEXT: s_endpgm @@ -459,14 +460,14 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre ; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX1030: ; %bb.0: ; %main_body ; G_GFX1030-NEXT: s_clause 0x2 -; G_GFX1030-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; G_GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; G_GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; G_GFX1030-NEXT: s_load_dword s2, s[0:1], 0x3c +; G_GFX1030-NEXT: s_load_dword s0, s[0:1], 0x3c ; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX1030-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX1030-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX1030-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX1030-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX1030-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX1030-NEXT: s_waitcnt vmcnt(0) ; G_GFX1030-NEXT: ds_write_b32 v1, v0 ; G_GFX1030-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll index e73354d2d0b8..d3153d12641c 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -2362,46 +2362,46 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: global_load_ushort v3, v0, s[2:3] ; GFX9-NEXT: s_addc_u32 s9, s9, 0 ; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX9-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX9-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX9-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX9-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX9-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX9-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX9-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_and_b32_e32 v18, 15, v2 -; GFX9-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX9-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX9-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX9-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX9-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX9-NEXT: v_bfe_u32 v1, v1, 4, 4 -; GFX9-NEXT: v_bfe_u32 v12, v2, 24, 4 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 28, v2 -; GFX9-NEXT: v_bfe_u32 v14, v2, 16, 4 -; GFX9-NEXT: v_bfe_u32 v15, v2, 20, 4 -; GFX9-NEXT: v_bfe_u32 v16, v2, 8, 4 -; GFX9-NEXT: v_bfe_u32 v17, v2, 12, 4 -; GFX9-NEXT: v_bfe_u32 v2, v2, 4, 4 -; GFX9-NEXT: v_and_b32_e32 v11, v4, v11 -; GFX9-NEXT: v_and_b32_e32 v18, v4, v18 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v11 -; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v18 -; GFX9-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v5, v4, v5 -; GFX9-NEXT: v_and_b32_e32 v16, v4, v16 +; GFX9-NEXT: v_lshrrev_b32_e32 v12, 28, v2 +; GFX9-NEXT: v_bfe_u32 v13, v2, 24, 4 +; GFX9-NEXT: v_bfe_u32 v14, v2, 20, 4 +; GFX9-NEXT: v_bfe_u32 v15, v2, 16, 4 +; GFX9-NEXT: v_bfe_u32 v16, v2, 12, 4 +; GFX9-NEXT: v_bfe_u32 v17, v2, 8, 4 +; GFX9-NEXT: v_bfe_u32 v18, v2, 4, 4 +; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX9-NEXT: v_and_b32_e32 v1, v4, v1 +; GFX9-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX9-NEXT: v_lshl_or_b32 v1, v11, 16, v1 +; GFX9-NEXT: v_lshl_or_b32 v2, v18, 16, v2 +; GFX9-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX9-NEXT: v_and_b32_e32 v6, v4, v6 +; GFX9-NEXT: v_and_b32_e32 v17, v4, v17 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX9-NEXT: v_lshl_or_b32 v5, v6, 16, v5 -; GFX9-NEXT: v_lshl_or_b32 v6, v17, 16, v16 +; GFX9-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v6 +; GFX9-NEXT: v_lshl_or_b32 v6, v16, 16, v17 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX9-NEXT: v_and_b32_e32 v14, v4, v14 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v15, v4, v15 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v13 ; GFX9-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX9-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX9-NEXT: v_lshl_or_b32 v8, v15, 16, v14 -; GFX9-NEXT: v_lshl_or_b32 v4, v13, 16, v4 +; GFX9-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX9-NEXT: v_lshl_or_b32 v8, v14, 16, v15 +; GFX9-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX9-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -2445,46 +2445,46 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: global_load_ushort v3, v0, s[2:3] ; GFX9-DL-NEXT: s_addc_u32 s9, s9, 0 ; GFX9-DL-NEXT: s_waitcnt vmcnt(2) -; GFX9-DL-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX9-DL-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX9-DL-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX9-DL-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX9-DL-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX9-DL-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX9-DL-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX9-DL-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX9-DL-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) -; GFX9-DL-NEXT: v_and_b32_e32 v18, 15, v2 -; GFX9-DL-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX9-DL-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX9-DL-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX9-DL-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX9-DL-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX9-DL-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX9-DL-NEXT: v_bfe_u32 v1, v1, 4, 4 -; GFX9-DL-NEXT: v_bfe_u32 v12, v2, 24, 4 -; GFX9-DL-NEXT: v_lshrrev_b32_e32 v13, 28, v2 -; GFX9-DL-NEXT: v_bfe_u32 v14, v2, 16, 4 -; GFX9-DL-NEXT: v_bfe_u32 v15, v2, 20, 4 -; GFX9-DL-NEXT: v_bfe_u32 v16, v2, 8, 4 -; GFX9-DL-NEXT: v_bfe_u32 v17, v2, 12, 4 -; GFX9-DL-NEXT: v_bfe_u32 v2, v2, 4, 4 -; GFX9-DL-NEXT: v_and_b32_e32 v11, v4, v11 -; GFX9-DL-NEXT: v_and_b32_e32 v18, v4, v18 -; GFX9-DL-NEXT: v_lshl_or_b32 v1, v1, 16, v11 -; GFX9-DL-NEXT: v_lshl_or_b32 v2, v2, 16, v18 -; GFX9-DL-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX9-DL-NEXT: v_and_b32_e32 v5, v4, v5 -; GFX9-DL-NEXT: v_and_b32_e32 v16, v4, v16 +; GFX9-DL-NEXT: v_lshrrev_b32_e32 v12, 28, v2 +; GFX9-DL-NEXT: v_bfe_u32 v13, v2, 24, 4 +; GFX9-DL-NEXT: v_bfe_u32 v14, v2, 20, 4 +; GFX9-DL-NEXT: v_bfe_u32 v15, v2, 16, 4 +; GFX9-DL-NEXT: v_bfe_u32 v16, v2, 12, 4 +; GFX9-DL-NEXT: v_bfe_u32 v17, v2, 8, 4 +; GFX9-DL-NEXT: v_bfe_u32 v18, v2, 4, 4 +; GFX9-DL-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX9-DL-NEXT: v_and_b32_e32 v1, v4, v1 +; GFX9-DL-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX9-DL-NEXT: v_lshl_or_b32 v1, v11, 16, v1 +; GFX9-DL-NEXT: v_lshl_or_b32 v2, v18, 16, v2 +; GFX9-DL-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX9-DL-NEXT: v_and_b32_e32 v6, v4, v6 +; GFX9-DL-NEXT: v_and_b32_e32 v17, v4, v17 ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-DL-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX9-DL-NEXT: v_lshl_or_b32 v5, v6, 16, v5 -; GFX9-DL-NEXT: v_lshl_or_b32 v6, v17, 16, v16 +; GFX9-DL-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX9-DL-NEXT: v_lshl_or_b32 v5, v5, 16, v6 +; GFX9-DL-NEXT: v_lshl_or_b32 v6, v16, 16, v17 ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-DL-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX9-DL-NEXT: v_and_b32_e32 v14, v4, v14 -; GFX9-DL-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX9-DL-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX9-DL-NEXT: v_and_b32_e32 v15, v4, v15 +; GFX9-DL-NEXT: v_and_b32_e32 v4, v4, v13 ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX9-DL-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX9-DL-NEXT: v_lshl_or_b32 v8, v15, 16, v14 -; GFX9-DL-NEXT: v_lshl_or_b32 v4, v13, 16, v4 +; GFX9-DL-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX9-DL-NEXT: v_lshl_or_b32 v8, v14, 16, v15 +; GFX9-DL-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) @@ -2529,57 +2529,57 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX10-DL-XNACK-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-DL-XNACK-NEXT: global_load_ushort v3, v0, s[0:1] ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(2) -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v13, 15, v2 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v1, v1, 4, 4 ; GFX10-DL-XNACK-NEXT: v_bfe_u32 v16, v2, 4, 4 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v11, v4, v11 +; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v12, 28, v2 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v14, v2, 24, 4 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v1, v4, v1 ; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v13, v4, v13 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v18, v2, 8, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v12, v2, 24, 4 -; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v14, 28, v2 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v1, v1, 16, v11 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v15, v2, 20, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v17, v2, 16, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v18, v2, 12, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v2, v2, 8, 4 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v1, v11, 16, v1 ; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v11, v16, 16, v13 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v15, v2, 16, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v17, v2, 20, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v2, v2, 12, 4 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v13, v4, v18 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v2, v4, v2 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v2, v2, 16, v13 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v2, v18, 16, v2 ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v10, v4, v15 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v10, v4, v17 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v1, v1, v8 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v8, v17, 16, v10 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v8, v15, 16, v10 ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 16, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v3 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v3, v4, v5 -; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v5, 12, v8 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1] +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v4, v4, v14 ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v2, v9, v2 ; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v10 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v3, v6, 16, v3 -; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v4, v14, 16, v4 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v3, v5, 16, v3 +; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1] +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v2 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v2, 12, v3 op_sel_hi:[0,1] @@ -2617,57 +2617,57 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX10-DL-NOXNACK-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-DL-NOXNACK-NEXT: global_load_ushort v3, v2, s[0:1] ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(2) -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v13, 15, v0 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v1, v1, 4, 4 ; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v16, v0, 4, 4 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v11, v4, v11 +; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v12, 28, v0 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v14, v0, 24, 4 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v1, v4, v1 ; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v13, v4, v13 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v18, v0, 8, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v12, v0, 24, 4 -; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v14, 28, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v1, v1, 16, v11 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v15, v0, 20, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v17, v0, 16, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v18, v0, 12, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v0, v0, 8, 4 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v1, v11, 16, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v11, v16, 16, v13 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v15, v0, 16, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v17, v0, 20, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v0, v0, 12, 4 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v13, v4, v18 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v0, v4, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v0, v0, 16, v13 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v0, v18, 16, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v10, v4, v15 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v10, v4, v17 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v0, 12, v0 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v1, v1, v8 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v8, v17, 16, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v8, v15, 16, v10 ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v0, 12, v0 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v10, 16, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v1, v1, v3 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v3, v4, v5 -; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v5, 12, v8 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1] +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v4, v4, v14 ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v0, v9, v0 ; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v1, v1, v10 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v3, v6, 16, v3 -; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v4, v14, 16, v4 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v3, v5, 16, v3 +; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1] +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v1, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v3 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll index e879fce3f4af..91a403ff983a 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll @@ -244,25 +244,25 @@ define amdgpu_kernel void @lds_ds_fmin(float addrspace(5)* %out, float addrspace ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f32 v1, v2, v1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v1, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen ; G_GFX9-NEXT: s_endpgm @@ -533,25 +533,25 @@ define amdgpu_kernel void @lds_ds_fmax(float addrspace(5)* %out, float addrspace ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f32 v1, v2, v1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v1, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen ; G_GFX9-NEXT: s_endpgm @@ -800,23 +800,23 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; G_GFX7: ; %bb.0: ; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb +; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb ; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; G_GFX7-NEXT: s_mov_b32 s10, -1 ; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000 ; G_GFX7-NEXT: s_add_u32 s8, s8, s3 +; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX7-NEXT: s_mov_b32 s4, 0 +; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: s_add_i32 s2, s2, 4 -; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX7-NEXT: s_add_i32 s4, s4, 4 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 ; G_GFX7-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -834,23 +834,23 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; G_VI: ; %bb.0: ; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; G_VI-NEXT: s_mov_b32 s90, -1 ; G_VI-NEXT: s_mov_b32 s91, 0xe80000 ; G_VI-NEXT: s_add_u32 s88, s88, s3 +; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 -; G_VI-NEXT: s_mov_b32 s4, 0 +; G_VI-NEXT: s_mov_b32 s3, 0x40450000 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) -; G_VI-NEXT: s_add_i32 s2, s2, 4 -; G_VI-NEXT: s_mov_b32 s5, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s4 -; G_VI-NEXT: s_lshl_b32 s3, s2, 3 -; G_VI-NEXT: v_mov_b32_e32 v1, s5 -; G_VI-NEXT: v_mov_b32_e32 v2, s3 +; G_VI-NEXT: s_add_i32 s4, s4, 4 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_VI-NEXT: s_lshl_b32 s2, s2, 4 +; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 ; G_VI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -868,28 +868,28 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; G_GFX9-NEXT: s_mov_b32 s0, 0 +; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v2, s1 +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 -; G_GFX9-NEXT: v_mov_b32_e32 v4, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v5, v[0:1] ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] -; G_GFX9-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -897,31 +897,31 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; ; G_GFX10-LABEL: lds_ds_fmin_f64: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_clause 0x1 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; G_GFX10-NEXT: s_mov_b32 s10, -1 ; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 ; G_GFX10-NEXT: s_add_u32 s8, s8, s3 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX10-NEXT: s_mov_b32 s0, 0 ; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX10-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_add_i32 s2, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v5, s7 -; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX10-NEXT: s_add_i32 s4, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v5, s3 +; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3] -; G_GFX10-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -1143,23 +1143,23 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; G_GFX7: ; %bb.0: ; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb +; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb ; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; G_GFX7-NEXT: s_mov_b32 s10, -1 ; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000 ; G_GFX7-NEXT: s_add_u32 s8, s8, s3 +; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX7-NEXT: s_mov_b32 s4, 0 +; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: s_add_i32 s2, s2, 4 -; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX7-NEXT: s_add_i32 s4, s4, 4 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 ; G_GFX7-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -1177,23 +1177,23 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; G_VI: ; %bb.0: ; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; G_VI-NEXT: s_mov_b32 s90, -1 ; G_VI-NEXT: s_mov_b32 s91, 0xe80000 ; G_VI-NEXT: s_add_u32 s88, s88, s3 +; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 -; G_VI-NEXT: s_mov_b32 s4, 0 +; G_VI-NEXT: s_mov_b32 s3, 0x40450000 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) -; G_VI-NEXT: s_add_i32 s2, s2, 4 -; G_VI-NEXT: s_mov_b32 s5, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s4 -; G_VI-NEXT: s_lshl_b32 s3, s2, 3 -; G_VI-NEXT: v_mov_b32_e32 v1, s5 -; G_VI-NEXT: v_mov_b32_e32 v2, s3 +; G_VI-NEXT: s_add_i32 s4, s4, 4 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_VI-NEXT: s_lshl_b32 s2, s2, 4 +; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 ; G_VI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -1211,28 +1211,28 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; G_GFX9-NEXT: s_mov_b32 s0, 0 +; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v2, s1 +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 -; G_GFX9-NEXT: v_mov_b32_e32 v4, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v5, v[0:1] ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] -; G_GFX9-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -1240,31 +1240,31 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; ; G_GFX10-LABEL: lds_ds_fmax_f64: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_clause 0x1 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; G_GFX10-NEXT: s_mov_b32 s10, -1 ; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 ; G_GFX10-NEXT: s_add_u32 s8, s8, s3 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX10-NEXT: s_mov_b32 s0, 0 ; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX10-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_add_i32 s2, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v5, s7 -; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX10-NEXT: s_add_i32 s4, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v5, s3 +; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3] -; G_GFX10-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir new file mode 100644 index 000000000000..9cbdafe7f97c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s + +--- +name: same_slot_agpr_sgpr +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: same_slot_agpr_sgpr + ; CHECK: liveins: $agpr0, $agpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5 + ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + $sgpr4_sgpr5 = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... +--- +name: diff_slot_agpr_sgpr +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: diff_slot_agpr_sgpr + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + $sgpr4_sgpr5 = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... +--- +name: dead_vgpr_slot +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: dead_vgpr_slot + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll index ae0c8b57b064..4c55eddcb4e7 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -13,15 +13,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* ; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]] ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4) - ; CHECK-NEXT: undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY killed %16 - ; CHECK-NEXT: [[COPY2]].sub2:sgpr_96_with_sub0_sub1 = COPY undef [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0 - ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1 - ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index 85dc9991bdaa..8d4d929e029d 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -179,13 +179,13 @@ define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-LABEL: shuffle_v4f16_35u5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dword v4, v[2:3], off ; GFX9-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX9-NEXT: global_load_dword v4, v[2:3], off ; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v4 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -194,13 +194,13 @@ define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -214,13 +214,13 @@ define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-LABEL: shuffle_v4f16_357u: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX9-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX9-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v5 ; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -229,13 +229,13 @@ define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -803,8 +803,8 @@ define <4 x half> @shuffle_v4f16_5734(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_and_b32_sdwa v1, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5 ; GFX9-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5 ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 ; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -817,11 +817,11 @@ define <4 x half> @shuffle_v4f16_5734(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v5 -; GFX10-NEXT: v_and_b32_sdwa v2, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_b32_sdwa v1, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v5 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_sdwa v3, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0 @@ -985,13 +985,13 @@ define <4 x half> @shuffle_v4f16_6161(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-LABEL: shuffle_v4f16_6161: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dword v4, v[0:1], off -; GFX9-NEXT: global_load_dword v5, v[2:3], off offset:4 +; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX9-NEXT: global_load_dword v5, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v4 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v5 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -999,13 +999,13 @@ define <4 x half> @shuffle_v4f16_6161(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dword v4, v[0:1], off -; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4 +; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX10-NEXT: global_load_dword v5, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v5 -; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0 diff --git a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index 92dccdb77194..31f3da743054 100644 --- a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -10,12 +10,12 @@ target triple = "thumbv7-apple-darwin10" @x4 = internal global i8 1, align 1, !dbg !8 @x5 = global i8 1, align 1, !dbg !10 -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x1" ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr [[ADDR:0x[0-9a-fA-F]+]]) -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x2" ; CHECK-NOT: {{DW_TAG|NULL}} diff --git a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index d913a16eb93a..ac229419a10f 100644 --- a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -1,11 +1,11 @@ ; RUN: llc -arm-global-merge -global-merge-group-by-use=false -filetype=obj < %s | llvm-dwarfdump -debug-info -v - | FileCheck %s -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x1" ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr [[ADDR:0x[0-9a-fA-F]+]]) -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x2" ; CHECK-NOT: {{DW_TAG|NULL}} diff --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll new file mode 100644 index 000000000000..b63b142793ee --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc-aix- < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-- < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK_LE +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK_P8LE +define i1 @foo(<2 x i64> %a) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 35, 34 +; CHECK-NEXT: lwz 3, L..C0(2) # %const.0 +; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: lxvw4x 35, 0, 3 +; CHECK-NEXT: addi 3, 1, -16 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 0, 35, 34 +; CHECK-NEXT: stxvw4x 0, 0, 3 +; CHECK-NEXT: lwz 3, -12(1) +; CHECK-NEXT: blr +; +; CHECK_LE-LABEL: foo: +; CHECK_LE: # %bb.0: # %entry +; CHECK_LE-NEXT: xxswapd 35, 34 +; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: addi 3, 1, -16 +; CHECK_LE-NEXT: vperm 3, 2, 2, 3 +; CHECK_LE-NEXT: xxland 34, 35, 34 +; CHECK_LE-NEXT: stvx 2, 0, 3 +; CHECK_LE-NEXT: ld 3, -16(1) +; CHECK_LE-NEXT: blr +; +; CHECK_P8LE-LABEL: foo: +; CHECK_P8LE: # %bb.0: # %entry +; CHECK_P8LE-NEXT: xxswapd 35, 34 +; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 +; CHECK_P8LE-NEXT: xxswapd 0, 34 +; CHECK_P8LE-NEXT: mffprd 3, 0 +; CHECK_P8LE-NEXT: blr +entry: + %0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> + %1 = icmp eq <2 x i64> %a, %0 + %2 = extractelement <2 x i1> %1, i32 0 + ret i1 %2 +} diff --git a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll index 727c2d328406..3a7bfcfb19f1 100644 --- a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll +++ b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll @@ -10,11 +10,21 @@ define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone { ; CHECK-LABEL: v2si64_cmp: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 34, 35, 34 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: v2si64_cmp: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: vcmpequw 2, 2, 3 +; CHECK-BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvw4x 35, 0, 3 +; CHECK-BE-NEXT: vperm 3, 2, 2, 3 +; CHECK-BE-NEXT: xxland 34, 35, 34 ; CHECK-BE-NEXT: blr %cmp = icmp eq <2 x i64> %x, %y %result = sext <2 x i1> %cmp to <2 x i64> diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll index 0bcf03450b5d..eee3ff42bd13 100644 --- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll @@ -11979,9 +11979,14 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { ; PWR7-NEXT: std 3, -16(1) ; PWR7-NEXT: addi 3, 1, -16 ; PWR7-NEXT: lxvw4x 0, 0, 3 +; PWR7-NEXT: addis 3, 2, .LCPI100_0@toc@ha +; PWR7-NEXT: addi 3, 3, .LCPI100_0@toc@l ; PWR7-NEXT: xxland 34, 34, 0 ; PWR7-NEXT: vcmpequw 2, 2, 3 +; PWR7-NEXT: lxvw4x 35, 0, 3 ; PWR7-NEXT: xxlnor 34, 34, 34 +; PWR7-NEXT: vperm 3, 2, 2, 3 +; PWR7-NEXT: xxland 34, 35, 34 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v2i64: @@ -12045,8 +12050,13 @@ define <2 x i64> @ult_2_v2i64(<2 x i64> %0) { ; PWR7-NEXT: std 3, -16(1) ; PWR7-NEXT: addi 3, 1, -16 ; PWR7-NEXT: lxvw4x 0, 0, 3 +; PWR7-NEXT: addis 3, 2, .LCPI101_0@toc@ha +; PWR7-NEXT: addi 3, 3, .LCPI101_0@toc@l ; PWR7-NEXT: xxland 34, 34, 0 ; PWR7-NEXT: vcmpequw 2, 2, 3 +; PWR7-NEXT: lxvw4x 35, 0, 3 +; PWR7-NEXT: vperm 3, 2, 2, 3 +; PWR7-NEXT: xxland 34, 35, 34 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ult_2_v2i64: diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index a62f006eaeb1..d882050b2e5a 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -2032,16 +2032,31 @@ define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test65: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpequw v2, v2, v3 +; CHECK-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI59_0@toc@l +; CHECK-NEXT: lxvw4x v3, 0, r3 +; CHECK-NEXT: vperm v3, v2, v2, v3 +; CHECK-NEXT: xxland v2, v3, v2 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test65: ; CHECK-REG: # %bb.0: ; CHECK-REG-NEXT: vcmpequw v2, v2, v3 +; CHECK-REG-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; CHECK-REG-NEXT: addi r3, r3, .LCPI59_0@toc@l +; CHECK-REG-NEXT: lxvw4x v3, 0, r3 +; CHECK-REG-NEXT: vperm v3, v2, v2, v3 +; CHECK-REG-NEXT: xxland v2, v3, v2 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test65: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: vcmpequw v2, v2, v3 +; CHECK-FISL-NEXT: vcmpequw v3, v2, v3 +; CHECK-FISL-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; CHECK-FISL-NEXT: addi r3, r3, .LCPI59_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vperm v2, v3, v3, v2 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test65: @@ -2059,19 +2074,34 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test66: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpequw v2, v2, v3 +; CHECK-NEXT: addis r3, r2, .LCPI60_0@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI60_0@toc@l +; CHECK-NEXT: lxvw4x v3, 0, r3 ; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vperm v3, v2, v2, v3 +; CHECK-NEXT: xxland v2, v3, v2 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test66: ; CHECK-REG: # %bb.0: ; CHECK-REG-NEXT: vcmpequw v2, v2, v3 +; CHECK-REG-NEXT: addis r3, r2, .LCPI60_0@toc@ha +; CHECK-REG-NEXT: addi r3, r3, .LCPI60_0@toc@l +; CHECK-REG-NEXT: lxvw4x v3, 0, r3 ; CHECK-REG-NEXT: xxlnor v2, v2, v2 +; CHECK-REG-NEXT: vperm v3, v2, v2, v3 +; CHECK-REG-NEXT: xxland v2, v3, v2 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test66: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: vcmpequw v2, v2, v3 -; CHECK-FISL-NEXT: xxlnor v2, v2, v2 +; CHECK-FISL-NEXT: xxlnor v3, v2, v2 +; CHECK-FISL-NEXT: addis r3, r2, .LCPI60_0@toc@ha +; CHECK-FISL-NEXT: addi r3, r3, .LCPI60_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vperm v2, v3, v3, v2 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test66: diff --git a/llvm/test/CodeGen/RISCV/double-convert-strict.ll b/llvm/test/CodeGen/RISCV/double-convert-strict.ll new file mode 100644 index 000000000000..1d300697ca5c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-convert-strict.ll @@ -0,0 +1,850 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IFD %s +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IFD %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64I %s + +; NOTE: The rounding mode metadata does not effect which instruction is +; selected. Dynamic rounding mode is always used for operations that +; support rounding mode. + +define float @fcvt_s_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_s_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.s.d ft0, ft0 +; RV32IFD-NEXT: fmv.x.w a0, ft0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_s_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.s.d ft0, ft0 +; RV64IFD-NEXT: fmv.x.w a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_s_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __truncdfsf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __truncdfsf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) + +define double @fcvt_d_s(float %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_s: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fmv.w.x ft0, a0 +; RV32IFD-NEXT: fcvt.d.s ft0, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_s: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.w.x ft0, a0 +; RV64IFD-NEXT: fcvt.d.s ft0, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __extendsfdf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __extendsfdf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.fpext.f64.f32(float %a, metadata !"fpexcept.strict") + ret double %1 +} +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) + +define i32 @fcvt_w_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_w_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_w_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_w_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) + +; For RV64D, fcvt.lu.d is semantically equivalent to fcvt.wu.d in this case +; because fptosi will produce poison if the result doesn't fit into an i32. +define i32 @fcvt_wu_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_wu_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +define i32 @fcvt_wu_d_multiple_use(double %x, i32* %y) { +; RV32IFD-LABEL: fcvt_wu_d_multiple_use: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.wu.d a1, ft0, rtz +; RV32IFD-NEXT: li a0, 1 +; RV32IFD-NEXT: beqz a1, .LBB4_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: mv a0, a1 +; RV32IFD-NEXT: .LBB4_2: +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d_multiple_use: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.wu.d a1, ft0, rtz +; RV64IFD-NEXT: li a0, 1 +; RV64IFD-NEXT: beqz a1, .LBB4_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: mv a0, a1 +; RV64IFD-NEXT: .LBB4_2: +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: beqz a1, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: call __fixunsdfsi@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define double @fcvt_d_w(i32 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) + +define double @fcvt_d_w_load(i32* %p) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lw a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @fcvt_d_wu(i32 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) + +define double @fcvt_d_wu_load(i32* %p) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lwu a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define i64 @fcvt_l_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_l_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_l_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_l_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) + +define i64 @fcvt_lu_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_lu_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_lu_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) + +define double @fcvt_d_l(i64 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_l: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __floatdidf@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_l: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) + +define double @fcvt_d_lu(i64 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_lu: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __floatundidf@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_lu: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.lu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) + +define double @fcvt_d_w_i8(i8 signext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_i8: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_i8: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata) + +define double @fcvt_d_wu_i8(i8 zeroext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_i8: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_i8: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata) + +define double @fcvt_d_w_i16(i16 signext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_i16: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_i16: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata) + +define double @fcvt_d_wu_i16(i16 zeroext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_i16: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_i16: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, double* %1) { +; RV32IFD-LABEL: fcvt_d_w_demanded_bits: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi a0, a0, 1 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 0(a1) +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_demanded_bits: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addiw a0, a0, 1 +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fsd ft0, 0(a1) +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store double %4, double* %1, align 8 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, double* %1) { +; RV32IFD-LABEL: fcvt_d_wu_demanded_bits: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi a0, a0, 1 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 0(a1) +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_demanded_bits: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addiw a0, a0, 1 +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fsd ft0, 0(a1) +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store double %4, double* %1, align 8 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/float-convert-strict.ll b/llvm/test/CodeGen/RISCV/float-convert-strict.ll new file mode 100644 index 000000000000..bd0ab2249fb8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-convert-strict.ll @@ -0,0 +1,719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IF %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64I %s + +; NOTE: The rounding mode metadata does not effect which instruction is +; selected. Dynamic rounding mode is always used for operations that +; support rounding mode. + +define i32 @fcvt_w_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_w_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_w_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) + +define i32 @fcvt_wu_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_wu_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +define i32 @fcvt_wu_s_multiple_use(float %x, i32* %y) { +; RV32IF-LABEL: fcvt_wu_s_multiple_use: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV32IF-NEXT: li a0, 1 +; RV32IF-NEXT: beqz a1, .LBB2_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: .LBB2_2: +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s_multiple_use: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV64IF-NEXT: li a0, 1 +; RV64IF-NEXT: beqz a1, .LBB2_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: .LBB2_2: +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: beqz a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: call __fixunssfsi@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define float @fcvt_s_w(i32 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) + +define float @fcvt_s_w_load(i32* %p) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lw a0, 0(a0) +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @fcvt_s_wu(i32 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata) + +define float @fcvt_s_wu_load(i32* %p) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lwu a0, 0(a0) +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define i64 @fcvt_l_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_l_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_l_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_l_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) + +define i64 @fcvt_lu_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_lu_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_lu_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) + +define float @fcvt_s_l(i64 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_l: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __floatdisf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_l: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) + +define float @fcvt_s_lu(i64 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_lu: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __floatundisf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_lu: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.lu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) + +define float @fcvt_s_w_i8(i8 signext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_i8: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_i8: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata) + +define float @fcvt_s_wu_i8(i8 zeroext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_i8: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_i8: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata) + +define float @fcvt_s_w_i16(i16 signext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_i16: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_i16: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) + +define float @fcvt_s_wu_i16(i16 zeroext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_i16: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_i16: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_s_w_demanded_bits(i32 signext %0, float* %1) { +; RV32IF-LABEL: fcvt_s_w_demanded_bits: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a0, a0, 1 +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsw ft0, 0(a1) +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_demanded_bits: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addiw a0, a0, 1 +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(a1) +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store float %4, float* %1, align 4 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, float* %1) { +; RV32IF-LABEL: fcvt_s_wu_demanded_bits: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a0, a0, 1 +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fsw ft0, 0(a1) +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_demanded_bits: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addiw a0, a0, 1 +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(a1) +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store float %4, float* %1, align 4 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll new file mode 100644 index 000000000000..bb8771ea2d07 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll @@ -0,0 +1,712 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV32IDZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV64IDZFH %s + +; NOTE: The rounding mode metadata does not effect which instruction is +; selected. Dynamic rounding mode is always used for operations that +; support rounding mode. + +define i16 @fcvt_si_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_si_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_si_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_si_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_si_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i16 %1 +} +declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata) + +define i16 @fcvt_ui_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_ui_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_ui_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_ui_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_ui_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i16 %1 +} +declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata) + +define i32 @fcvt_w_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_w_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_w_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_w_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_w_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define i32 @fcvt_wu_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_wu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +; FIXME: We should not have an fcvt.wu.h and an fcvt.lu.h. +define i32 @fcvt_wu_h_multiple_use(half %x, i32* %y) { +; RV32IZFH-LABEL: fcvt_wu_h_multiple_use: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV32IZFH-NEXT: li a0, 1 +; RV32IZFH-NEXT: beqz a1, .LBB4_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: mv a0, a1 +; RV32IZFH-NEXT: .LBB4_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h_multiple_use: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV64IZFH-NEXT: li a0, 1 +; RV64IZFH-NEXT: beqz a1, .LBB4_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: mv a0, a1 +; RV64IZFH-NEXT: .LBB4_2: +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h_multiple_use: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV32IDZFH-NEXT: li a0, 1 +; RV32IDZFH-NEXT: beqz a1, .LBB4_2 +; RV32IDZFH-NEXT: # %bb.1: +; RV32IDZFH-NEXT: mv a0, a1 +; RV32IDZFH-NEXT: .LBB4_2: +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h_multiple_use: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV64IDZFH-NEXT: li a0, 1 +; RV64IDZFH-NEXT: beqz a1, .LBB4_2 +; RV64IDZFH-NEXT: # %bb.1: +; RV64IDZFH-NEXT: mv a0, a1 +; RV64IDZFH-NEXT: .LBB4_2: +; RV64IDZFH-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define i64 @fcvt_l_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_l_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __fixhfdi@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_l_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_l_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __fixhfdi@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_l_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) + +define i64 @fcvt_lu_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_lu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __fixunshfdi@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_lu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_lu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __fixunshfdi@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_lu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) + +define half @fcvt_h_si(i16 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_si: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: slli a0, a0, 16 +; RV32IZFH-NEXT: srai a0, a0, 16 +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: slli a0, a0, 48 +; RV64IZFH-NEXT: srai a0, a0, 48 +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: slli a0, a0, 16 +; RV32IDZFH-NEXT: srai a0, a0, 16 +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: slli a0, a0, 48 +; RV64IDZFH-NEXT: srai a0, a0, 48 +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata) + +define half @fcvt_h_si_signext(i16 signext %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_si_signext: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si_signext: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si_signext: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si_signext: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_ui(i16 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_ui: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a1, 16 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a0, a1 +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a1, 16 +; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lui a1, 16 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a0, a1 +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lui a1, 16 +; RV64IDZFH-NEXT: addiw a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a0, a1 +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata) + +define half @fcvt_h_ui_zeroext(i16 zeroext %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_ui_zeroext: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui_zeroext: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui_zeroext: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui_zeroext: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_w(i32 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_w: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) + +define half @fcvt_h_w_load(i32* %p) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_w_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lw a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lw a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_wu(i32 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_wu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) + +define half @fcvt_h_wu_load(i32* %p) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_wu_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lwu a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lwu a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_l(i64 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_l: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __floatdihf@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_l: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_l: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __floatdihf@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_l: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) + +define half @fcvt_h_lu(i64 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_lu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __floatundihf@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_lu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_lu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __floatundihf@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_lu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) + +define half @fcvt_h_s(float %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_s: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_s: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %1 +} +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) + +define float @fcvt_s_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_s_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_s_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_s_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_s_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call float @llvm.experimental.constrained.fpext.f32.f16(half %a, metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) + +define half @fcvt_h_d(double %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_d: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __truncdfhf2@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_d: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: call __truncdfhf2@plt +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_d: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_d: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %1 +} +declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) + +define double @fcvt_d_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_d_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call __extendsfdf2@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_d_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call __extendsfdf2@plt +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_d_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_d_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call double @llvm.experimental.constrained.fpext.f64.f16(half %a, metadata !"fpexcept.strict") + ret double %1 +} +declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, half* %1) { +; RV32IZFH-LABEL: fcvt_h_w_demanded_bits: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, a0, 1 +; RV32IZFH-NEXT: fcvt.h.w ft0, a0 +; RV32IZFH-NEXT: fsh ft0, 0(a1) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_demanded_bits: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addiw a0, a0, 1 +; RV64IZFH-NEXT: fcvt.h.w ft0, a0 +; RV64IZFH-NEXT: fsh ft0, 0(a1) +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_demanded_bits: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi a0, a0, 1 +; RV32IDZFH-NEXT: fcvt.h.w ft0, a0 +; RV32IDZFH-NEXT: fsh ft0, 0(a1) +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_demanded_bits: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: addiw a0, a0, 1 +; RV64IDZFH-NEXT: fcvt.h.w ft0, a0 +; RV64IDZFH-NEXT: fsh ft0, 0(a1) +; RV64IDZFH-NEXT: ret + %3 = add i32 %0, 1 + %4 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store half %4, half* %1, align 2 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, half* %1) { +; RV32IZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, a0, 1 +; RV32IZFH-NEXT: fcvt.h.wu ft0, a0 +; RV32IZFH-NEXT: fsh ft0, 0(a1) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addiw a0, a0, 1 +; RV64IZFH-NEXT: fcvt.h.wu ft0, a0 +; RV64IZFH-NEXT: fsh ft0, 0(a1) +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi a0, a0, 1 +; RV32IDZFH-NEXT: fcvt.h.wu ft0, a0 +; RV32IDZFH-NEXT: fsh ft0, 0(a1) +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: addiw a0, a0, 1 +; RV64IDZFH-NEXT: fcvt.h.wu ft0, a0 +; RV64IDZFH-NEXT: fsh ft0, 0(a1) +; RV64IDZFH-NEXT: ret + %3 = add i32 %0, 1 + %4 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store half %4, half* %1, align 2 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll b/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll index 20ac5ef11111..14b7cb896674 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll @@ -30,3 +30,15 @@ define void @constraint_f() nounwind { tail call void asm "fadd.d fa0, fa0, $0", "f"(double 0.0) ret void } + +define void @constraint_r_fixed_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'r' + tail call void asm "add a0, a0, $0", "r"(<4 x i32> zeroinitializer) + ret void +} + +define void @constraint_r_scalable_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'r' + tail call void asm "add a0, a0, $0", "r"( zeroinitializer) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll b/llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll new file mode 100644 index 000000000000..48e39a2593b7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s + +; This test verifies that a repeated store is not eliminated with optnone (improves debugging). + +define void @foo(i32* %p) noinline optnone { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + store i32 8, i32* %p, align 4 + store i32 8, i32* %p, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll index b2994fcdaf81..452de04563f0 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -568,8 +568,7 @@ define i1 @andn_seqz_i32(i32 %a, i32 %b) nounwind { ; ; RV32ZBB-LABEL: andn_seqz_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a1 -; RV32ZBB-NEXT: xor a0, a0, a1 +; RV32ZBB-NEXT: andn a0, a1, a0 ; RV32ZBB-NEXT: seqz a0, a0 ; RV32ZBB-NEXT: ret ; @@ -625,8 +624,7 @@ define i1 @andn_snez_i32(i32 %a, i32 %b) nounwind { ; ; RV32ZBB-LABEL: andn_snez_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a1 -; RV32ZBB-NEXT: xor a0, a0, a1 +; RV32ZBB-NEXT: andn a0, a1, a0 ; RV32ZBB-NEXT: snez a0, a0 ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll new file mode 100644 index 000000000000..cc8c1cfdceb3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64ID + +; This file exhaustively checks double<->i32 conversions. In general, +; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: aext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) + +define signext i32 @sext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: sext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: zext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: slli a0, a0, 32 +; RV64ID-NEXT: srli a0, a0, 32 +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: aext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +define signext i32 @sext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: sext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: zext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define double @uitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_aext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata, metadata) + +define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_sext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_zext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @sitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_aext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata, metadata) + +define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_sext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_zext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll new file mode 100644 index 000000000000..466c15cbdf38 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64IF + +; This file exhaustively checks float<->i32 conversions. In general, +; fcvt.l[u].s can be selected instead of fcvt.w[u].s because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: aext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) + +define signext i32 @sext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: sext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: zext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: slli a0, a0, 32 +; RV64IF-NEXT: srli a0, a0, 32 +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: aext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +define signext i32 @sext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: sext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: zext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define float @uitofp_aext_i32_to_f32(i32 %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_aext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata) + +define float @uitofp_sext_i32_to_f32(i32 signext %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_sext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @uitofp_zext_i32_to_f32(i32 zeroext %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_zext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @sitofp_aext_i32_to_f32(i32 %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_aext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) + +define float @sitofp_sext_i32_to_f32(i32 signext %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_sext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @sitofp_zext_i32_to_f32(i32 zeroext %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_zext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll new file mode 100644 index 000000000000..cd9eed25a85a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s | \ +; RUN: FileCheck %s -check-prefix=RV64IZFH + +; This file exhaustively checks half<->i32 conversions. In general, +; fcvt.l[u].h can be selected instead of fcvt.w[u].h because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define signext i32 @sext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: slli a0, a0, 32 +; RV64IZFH-NEXT: srli a0, a0, 32 +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +define signext i32 @sext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define half @uitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: uitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata, metadata) + +define half @uitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: uitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @uitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: uitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: sitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata, metadata) + +define half @sitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: sitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: sitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll index be08a3ee8b1f..dc9c37b2ba6c 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll @@ -639,8 +639,7 @@ define i1 @andn_seqz_i32(i32 signext %a, i32 signext %b) nounwind { ; ; RV64ZBB-LABEL: andn_seqz_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: seqz a0, a0 ; RV64ZBB-NEXT: ret ; @@ -665,8 +664,7 @@ define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind { ; ; RV64ZBB-LABEL: andn_seqz_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: seqz a0, a0 ; RV64ZBB-NEXT: ret ; @@ -691,8 +689,7 @@ define i1 @andn_snez_i32(i32 signext %a, i32 signext %b) nounwind { ; ; RV64ZBB-LABEL: andn_snez_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: snez a0, a0 ; RV64ZBB-NEXT: ret ; @@ -717,8 +714,7 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind { ; ; RV64ZBB-LABEL: andn_snez_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: snez a0, a0 ; RV64ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll new file mode 100644 index 000000000000..632fe1929512 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @and_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.and.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.and.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @and_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.and.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.and.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @and_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.and.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.and.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @and_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.and.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.and.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @and_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.and.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare <1 x i1> @llvm.vp.or.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @or_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.or.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.or.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @or_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.or.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.or.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @or_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.or.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.or.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @or_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.or.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.or.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @or_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.or.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare <1 x i1> @llvm.vp.xor.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @xor_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.xor.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.xor.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @xor_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.xor.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.xor.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @xor_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.xor.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.xor.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @xor_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.xor.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.xor.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @xor_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.xor.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.xor.nxv1i1(, , , i32) + +define @xor_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv2i1(, , , i32) + +define @xor_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv4i1(, , , i32) + +define @xor_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv8i1(, , , i32) + +define @xor_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv16i1(, , , i32) + +define @xor_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv32i1(, , , i32) + +define @xor_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv64i1(, , , i32) + +define @xor_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 2d3c3a2e18e4..9217101c946c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1408,3 +1408,20 @@ define double @vreduce_fmax_v32f64(<32 x double>* %x) { %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) ret double %red } + +define float @vreduce_nsz_fadd_v4f32(<4 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_nsz_fadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) + ret float %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll new file mode 100644 index 000000000000..afd62aebf0a4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll @@ -0,0 +1,437 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @and_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.and.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.and.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @and_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.and.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.and.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @and_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.and.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.and.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @and_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.and.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.and.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @and_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.and.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.and.nxv1i1(, , , i32) + +define @and_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv2i1(, , , i32) + +define @and_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv4i1(, , , i32) + +define @and_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv8i1(, , , i32) + +define @and_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv16i1(, , , i32) + +define @and_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv32i1(, , , i32) + +define @and_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv64i1(, , , i32) + +define @and_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare <1 x i1> @llvm.vp.or.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @or_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.or.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.or.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @or_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.or.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.or.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @or_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.or.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.or.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @or_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.or.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.or.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @or_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.or.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.or.nxv1i1(, , , i32) + +define @or_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv2i1(, , , i32) + +define @or_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv4i1(, , , i32) + +define @or_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv8i1(, , , i32) + +define @or_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv16i1(, , , i32) + +define @or_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv32i1(, , , i32) + +define @or_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv64i1(, , , i32) + +define @or_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare <1 x i1> @llvm.vp.xor.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @xor_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.xor.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.xor.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @xor_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.xor.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.xor.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @xor_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.xor.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.xor.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @xor_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.xor.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.xor.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @xor_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.xor.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.xor.nxv1i1(, , , i32) + +define @xor_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv2i1(, , , i32) + +define @xor_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv4i1(, , , i32) + +define @xor_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv8i1(, , , i32) + +define @xor_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv16i1(, , , i32) + +define @xor_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv32i1(, , , i32) + +define @xor_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv64i1(, , , i32) + +define @xor_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index ce802a00163a..8404398fdc85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -882,3 +882,17 @@ define double @vreduce_fmax_nxv16f64( %v) { %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) ret double %red } + +define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { +; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) + ret float %red +} diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll index c95df7bcd67b..ef287b959427 100644 --- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll @@ -328,14 +328,12 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind { ; ; X64-AVX2-LABEL: rotr_v4i32: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 +; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; X64-AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; X64-AVX2-NEXT: retq %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) diff --git a/llvm/test/CodeGen/X86/pr15296.ll b/llvm/test/CodeGen/X86/pr15296.ll index 71034f696429..8476b765dbe2 100644 --- a/llvm/test/CodeGen/X86/pr15296.ll +++ b/llvm/test/CodeGen/X86/pr15296.ll @@ -27,27 +27,11 @@ define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i ; CHECK-LABEL: shiftInput___canonical: ; CHECK: # %bb.0: # %allocas ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpsrld %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vpsrlq $32, %xmm1, %xmm5 -; CHECK-NEXT: vpsrld %xmm5, %xmm3, %xmm6 -; CHECK-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7] -; CHECK-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; CHECK-NEXT: vpblendw {{.*#+}} xmm6 = xmm1[0,1],xmm6[2,3,4,5,6,7] -; CHECK-NEXT: vpsrld %xmm6, %xmm3, %xmm7 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; CHECK-NEXT: vpsrld %xmm1, %xmm3, %xmm3 -; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7] -; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] -; CHECK-NEXT: vpsrld %xmm2, %xmm0, %xmm2 -; CHECK-NEXT: vpsrld %xmm5, %xmm0, %xmm4 -; CHECK-NEXT: vpsrld %xmm6, %xmm0, %xmm5 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 +; CHECK-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm2[4,5,6,7] -; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7] -; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; CHECK-NEXT: retl allocas: %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index df579191c87f..0e8cceb4db3f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512BW-LABEL: var_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_funnnel_v32i16: @@ -334,30 +330,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 477f25902189..612c8103d4b1 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -232,14 +232,12 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; @@ -386,36 +384,32 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: var_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512F-NEXT: vpsllvd %ymm2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlvd %ymm2, %ymm0, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512F-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -424,16 +418,14 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; ; AVX512VL-LABEL: var_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VL-NEXT: vpsllvd %ymm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlvd %ymm2, %ymm0, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper @@ -442,26 +434,22 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; AVX512BW-LABEL: var_funnnel_v8i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; @@ -975,103 +963,89 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: psubw %xmm1, %xmm2 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] -; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psllw %xmm1, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; SSE2-NEXT: psubw %xmm2, %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psrlw %xmm1, %xmm0 +; SSE2-NEXT: psrlw %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] +; SSE2-NEXT: psubw %xmm1, %xmm2 +; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE2-NEXT: psllw %xmm2, %xmm0 ; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: psrlw %xmm2, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] ; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: psllw %xmm1, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm2, %xmm1 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; SSE41-NEXT: psrlw %xmm1, %xmm0 +; SSE41-NEXT: psllw %xmm1, %xmm0 ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_funnnel_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512F-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VL-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; @@ -1109,18 +1083,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v8i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pxor %xmm2, %xmm2 -; X86-SSE2-NEXT: psubw %xmm1, %xmm2 -; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] -; X86-SSE2-NEXT: pand %xmm2, %xmm1 +; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; X86-SSE2-NEXT: pand %xmm1, %xmm2 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 -; X86-SSE2-NEXT: psllw %xmm1, %xmm3 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; X86-SSE2-NEXT: psubw %xmm2, %xmm1 -; X86-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X86-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X86-SSE2-NEXT: psrlw %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw %xmm2, %xmm3 +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] +; X86-SSE2-NEXT: psubw %xmm1, %xmm2 +; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] +; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X86-SSE2-NEXT: psllw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm3, %xmm0 ; X86-SSE2-NEXT: retl %splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index c760469b20b4..2093a2870cd1 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -171,14 +171,12 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31] ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32] ; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; @@ -290,23 +288,22 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] -; AVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] -; AVX2-NEXT: vpsllvd %ymm4, %ymm3, %ymm4 +; AVX2-NEXT: vpsrlvd %ymm4, %ymm3, %ymm4 ; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] -; AVX2-NEXT: vpsllvd %ymm5, %ymm0, %ymm5 +; AVX2-NEXT: vpsrlvd %ymm5, %ymm0, %ymm5 ; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5 ; AVX2-NEXT: vpackusdw %ymm4, %ymm5, %ymm4 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: vpsubw %ymm1, %ymm5, %ymm1 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] -; AVX2-NEXT: vpsrlvd %ymm5, %ymm3, %ymm3 +; AVX2-NEXT: vpsllvd %ymm5, %ymm3, %ymm3 ; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] -; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -314,32 +311,28 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; ; AVX512F-LABEL: var_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %ymm1, %ymm3, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: var_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %ymm1, %ymm3, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512VL-NEXT: retq @@ -347,25 +340,21 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX512BW-LABEL: var_funnnel_v16i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; @@ -801,90 +790,78 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX1-NEXT: vpsrlw %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] ; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 -; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index ce177f1a8a81..d7ace82e7f08 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -36,29 +36,26 @@ define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind { define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: var_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm5, %zmm6, %zmm5 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %ymm3, %ymm7, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpsrlvd %zmm4, %zmm5, %zmm4 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %ymm3, %ymm6, %ymm3 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsrlvd %zmm3, %zmm6, %zmm3 -; AVX512F-NEXT: vpord %zmm3, %zmm5, %zmm3 +; AVX512F-NEXT: vpsllvd %zmm3, %zmm5, %zmm3 +; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm7, %ymm1 +; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 @@ -66,29 +63,26 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512VL-LABEL: var_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm5, %zmm6, %zmm5 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %ymm3, %ymm7, %ymm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm3 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm5, %zmm4 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %ymm3, %ymm6, %ymm3 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm6, %zmm3 -; AVX512VL-NEXT: vpord %zmm3, %zmm5, %zmm3 +; AVX512VL-NEXT: vpsllvd %zmm3, %zmm5, %zmm3 +; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm7, %ymm1 +; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 @@ -96,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512BW-LABEL: var_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_funnnel_v32i16: @@ -308,68 +298,60 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm3, %ymm2, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm2, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: @@ -546,15 +528,15 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; ; AVX512BW-LABEL: constant_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; @@ -716,15 +698,15 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1 +; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index 88975d76af98..23e6f2f8c77b 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -83,14 +83,12 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v2i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index a54988c0870d..4427d3b2c79f 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -91,20 +91,22 @@ define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; ; AVX512BW-LABEL: var_rotate_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_rotate_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_rotate_v32i16: @@ -341,22 +343,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind ; ; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 0d0f410cc830..e6c802e53514 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -2203,39 +2203,35 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; ; X86-AVX1-LABEL: PR52719: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm2 -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero -; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 +; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] +; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero +; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] ; X86-AVX1-NEXT: # xmm4 = mem[0,0] -; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3,4,5,6,7] +; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm5 ; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm6 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1,2,3],xmm5[4,5,6,7] -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm7, %xmm3 -; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm7, %xmm7 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm7[0,1,2,3],xmm3[4,5,6,7] -; X86-AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpsubq %xmm5, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm4 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7] -; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm6, %xmm1 +; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm2 +; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm2 +; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: PR52719: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 -; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] -; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 -; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 +; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl diff --git a/llvm/test/DebugInfo/AMDGPU/variable-locations.ll b/llvm/test/DebugInfo/AMDGPU/variable-locations.ll index b2fabbd3c381..365dd9dfea78 100644 --- a/llvm/test/DebugInfo/AMDGPU/variable-locations.ll +++ b/llvm/test/DebugInfo/AMDGPU/variable-locations.ll @@ -13,7 +13,22 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: DW_AT_name {{.*}}"GlobA" +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_external +; CHECK-NEXT: DW_AT_decl_file +; CHECK-NEXT: DW_AT_decl_line +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) @GlobA = common addrspace(1) global i32 0, align 4, !dbg !0 + +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: DW_AT_name {{.*}}"GlobB" +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_external +; CHECK-NEXT: DW_AT_decl_file +; CHECK-NEXT: DW_AT_decl_line +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) @GlobB = common addrspace(1) global i32 0, align 4, !dbg !6 ; CHECK: {{.*}}DW_TAG_subprogram @@ -63,28 +78,12 @@ entry: !llvm.ident = !{!12} !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) - -; CHECK: {{.*}}DW_TAG_variable -; CHECK-NEXT: DW_AT_name {{.*}}"GlobA" -; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_external -; CHECK-NEXT: DW_AT_decl_file -; CHECK-NEXT: DW_AT_decl_line -; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) !1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) !3 = !DIFile(filename: "variable-locations.cl", directory: "/some/random/directory") !4 = !{} !5 = !{!0, !6} !6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) - -; CHECK: {{.*}}DW_TAG_variable -; CHECK-NEXT: DW_AT_name {{.*}}"GlobB" -; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_external -; CHECK-NEXT: DW_AT_decl_file -; CHECK-NEXT: DW_AT_decl_line -; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) !7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !9 = !{i32 2, i32 0} diff --git a/llvm/test/DebugInfo/BPF/extern-void.ll b/llvm/test/DebugInfo/BPF/extern-void.ll index 283dd5411007..3bf8e27623e7 100644 --- a/llvm/test/DebugInfo/BPF/extern-void.ll +++ b/llvm/test/DebugInfo/BPF/extern-void.ll @@ -36,8 +36,9 @@ entry: ; CHECK: .quad bla1 ; CHECK-NEXT: DW_TAG_variable ; -; CHECK: .quad bla2 +; CHECK: .quad bla2 ; CHECK-NEXT: DW_TAG_const_type +; CHECK-NEXT: DW_TAG_subprogram ; Function Attrs: nounwind readnone speculatable willreturn declare void @llvm.dbg.value(metadata, metadata, metadata) #1 diff --git a/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll b/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll index 6e36aded9e19..2b6369e00521 100644 --- a/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll +++ b/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll @@ -27,15 +27,13 @@ ; The DISubprogram should show up in compile unit a. ; CHECK: DW_TAG_compile_unit ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("a.cpp") -; CHECK: DW_TAG_subprogram -; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("func") +; CHECK: DW_AT_name ("b.cpp") +; CHECK-NOT: DW_TAG_subprogram ; CHECK: DW_TAG_compile_unit ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("b.cpp") -; CHECK-NOT: DW_TAG_subprogram +; CHECK: DW_AT_name ("a.cpp") +; CHECK: DW_AT_name ("func") source_filename = "test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll" diff --git a/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll b/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll index 6b9759284b64..07e420ce47d5 100644 --- a/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll +++ b/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll @@ -18,10 +18,6 @@ ; CHECK-NEXT: DW_AT_reference DW_FORM_flag_present ; CHECK: DW_TAG_subroutine_type DW_CHILDREN_yes ; CHECK-NEXT: DW_AT_rvalue_reference DW_FORM_flag_present - -; CHECK: DW_TAG_subprogram -; CHECK-NOT: DW_TAG_subprogram -; CHECK: DW_AT_name {{.*}}"g" ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll b/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll index 9ea8c724bfc0..7b749f8c7f4d 100644 --- a/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll +++ b/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll @@ -11,9 +11,9 @@ ; We should have all three linkage names in the .debug_info and .debug_names ; ALL: .debug_info contents: +; ALL: DW_AT_linkage_name ("_ZN1n1vE") ; ALL: DW_AT_linkage_name ("_Z1fi") ; ALL: DW_AT_linkage_name ("_Z1gi") -; ALL: DW_AT_linkage_name ("_ZN1n1vE") ; ALL: .debug_names contents: ; ALL: String: {{.*}} "_Z1fi" ; ALL: String: {{.*}} "_Z1gi" diff --git a/llvm/test/DebugInfo/Generic/enum-types.ll b/llvm/test/DebugInfo/Generic/enum-types.ll index d40866c091a3..786ee28f54ac 100644 --- a/llvm/test/DebugInfo/Generic/enum-types.ll +++ b/llvm/test/DebugInfo/Generic/enum-types.ll @@ -6,12 +6,12 @@ ; rdar://17628609 ; CHECK: DW_TAG_compile_unit +; CHECK: 0x[[ENUM:.*]]: DW_TAG_enumeration_type +; CHECK-NEXT: DW_AT_name {{.*}}"EA" ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_MIPS_linkage_name {{.*}}"_Z4topA2EA" ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM:.*]]} -; CHECK: 0x[[ENUM]]: DW_TAG_enumeration_type -; CHECK-NEXT: DW_AT_name {{.*}}"EA" +; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM]]} ; CHECK: DW_TAG_compile_unit ; CHECK: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll b/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll deleted file mode 100644 index 471526b1395c..000000000000 --- a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; namespace ns { -; inline __attribute__((always_inline)) -; void foo() { int a = 4; } -; } -; -; void goo() { -; using ns::foo; -; foo(); -; } - -; Ensure that imported declarations reference the correct subprograms even if -; those subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_namespace -; CHECK: DW_AT_name ("ns") -; CHECK: [[FOO:0x.*]]: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("goo") -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin ([[FOO]] -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_imported_declaration -; CHECK: DW_AT_import ([[FOO]]) -; CHECK: NULL -; CHECK: NULL - -; Function Attrs: mustprogress noinline optnone uwtable -define dso_local void @_Z3goov() !dbg !4 { -entry: - %a.i = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %a.i, metadata !16, metadata !DIExpression()), !dbg !18 - store i32 4, i32* %a.i, align 4, !dbg !18 - ret void, !dbg !20 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!10, !11, !12, !13, !14} -!llvm.ident = !{!15} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "imported-inlined-declaration.cpp", directory: "") -!2 = !{!3} -!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !8, file: !1, line: 7) -!4 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !5, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) -!5 = !DISubroutineType(types: !6) -!6 = !{null} -!7 = !{} -!8 = distinct !DISubprogram(name: "foo", linkageName: "_ZN2ns3fooEv", scope: !9, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) -!9 = !DINamespace(name: "ns", scope: null) -!10 = !{i32 7, !"Dwarf Version", i32 4} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i32 1, !"wchar_size", i32 4} -!13 = !{i32 7, !"uwtable", i32 1} -!14 = !{i32 7, !"frame-pointer", i32 2} -!15 = !{!"clang version 14.0.0"} -!16 = !DILocalVariable(name: "a", scope: !8, file: !1, line: 3, type: !17) -!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!18 = !DILocation(line: 3, column: 18, scope: !8, inlinedAt: !19) -!19 = distinct !DILocation(line: 8, column: 2, scope: !4) -!20 = !DILocation(line: 9, column: 1, scope: !4) diff --git a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll index a26b687b1f67..a3aaaff66f85 100644 --- a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll +++ b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll @@ -13,17 +13,21 @@ ; Ensure that top level imported declarations don't produce an extra degenerate ; concrete subprogram definition. +; FIXME: imported entities should only be emitted to the abstract origin if one is present + ; CHECK: DW_TAG_compile_unit ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f1") ; CHECK: DW_TAG_imported_declaration ; CHECK: NULL +; CHECK: DW_TAG_namespace +; CHECK: DW_TAG_subprogram +; CHECK: NULL ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f2") ; CHECK: DW_TAG_inlined_subroutine -; CHECK: NULL -; CHECK: DW_TAG_namespace -; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_imported_declaration +; CHECK: NULL ; CHECK: NULL ; CHECK: NULL diff --git a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll index 2d708b5cb21a..df78a937ed12 100644 --- a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll +++ b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll @@ -34,9 +34,6 @@ ; b.m_fn2(); ; } -; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name ("B") - ; CHECK: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_name ("C") ; CHECK: [[M_FN3_DECL:.*]]: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/Generic/inlined-local-type.ll b/llvm/test/DebugInfo/Generic/inlined-local-type.ll deleted file mode 100644 index b3f3f80c07d2..000000000000 --- a/llvm/test/DebugInfo/Generic/inlined-local-type.ll +++ /dev/null @@ -1,125 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { struct A {int i;}; struct A a; return a.i++; } -; -; __attribute__((always_inline)) -; int not_removed() { struct B {int i;}; struct B b; return b.i++; } -; -; int foo() { return removed() + not_removed(); }} - -; Ensure that function-local types have the correct subprogram parent even if -; those subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin ({{0x.*}} "not_removed") -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: [[A:0x.*]]: DW_TAG_structure_type -; CHECK: DW_AT_name ("A") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type ([[A]] "A") -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: [[B:0x.*]]: DW_TAG_structure_type -; CHECK: DW_AT_name ("B") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type ([[B]] "B") -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -%struct.B = type { i32 } -%struct.A = type { i32 } - -define dso_local i32 @not_removed() !dbg !12 { - %1 = alloca %struct.B, align 4 - call void @llvm.dbg.declare(metadata %struct.B* %1, metadata !18, metadata !DIExpression()), !dbg !22 - %2 = getelementptr inbounds %struct.B, %struct.B* %1, i32 0, i32 0, !dbg !23 - %3 = load i32, i32* %2, align 4, !dbg !24 - %4 = add nsw i32 %3, 1, !dbg !24 - store i32 %4, i32* %2, align 4, !dbg !24 - ret i32 %3, !dbg !25 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -define dso_local i32 @foo() !dbg !26 { - %1 = alloca %struct.A, align 4 - %2 = alloca %struct.B, align 4 - call void @llvm.dbg.declare(metadata %struct.A* %1, metadata !27, metadata !DIExpression()), !dbg !32 - %3 = getelementptr inbounds %struct.A, %struct.A* %1, i32 0, i32 0, !dbg !34 - %4 = load i32, i32* %3, align 4, !dbg !35 - %5 = add nsw i32 %4, 1, !dbg !35 - store i32 %5, i32* %3, align 4, !dbg !35 - call void @llvm.dbg.declare(metadata %struct.B* %2, metadata !18, metadata !DIExpression()), !dbg !36 - %6 = getelementptr inbounds %struct.B, %struct.B* %2, i32 0, i32 0, !dbg !38 - %7 = load i32, i32* %6, align 4, !dbg !39 - %8 = add nsw i32 %7, 1, !dbg !39 - store i32 %8, i32* %6, align 4, !dbg !39 - %9 = add nsw i32 %4, %7, !dbg !40 - ret i32 %9, !dbg !41 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "inlined-local-type.cpp", directory: "") -!2 = !{i32 7, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 1, !"branch-target-enforcement", i32 0} -!6 = !{i32 1, !"sign-return-address", i32 0} -!7 = !{i32 1, !"sign-return-address-all", i32 0} -!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -!9 = !{i32 7, !"uwtable", i32 1} -!10 = !{i32 7, !"frame-pointer", i32 1} -!11 = !{!"clang version 14.0.0"} -!12 = distinct !DISubprogram(name: "not_removed", scope: !13, file: !13, line: 5, type: !14, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!13 = !DIFile(filename: "inlined-local-type.cpp", directory: "") -!14 = !DISubroutineType(types: !15) -!15 = !{!16} -!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!17 = !{} -!18 = !DILocalVariable(name: "b", scope: !12, file: !13, line: 5, type: !19) -!19 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "B", scope: !12, file: !13, line: 5, size: 32, elements: !20) -!20 = !{!21} -!21 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !19, file: !13, line: 5, baseType: !16, size: 32) -!22 = !DILocation(line: 5, column: 49, scope: !12) -!23 = !DILocation(line: 5, column: 61, scope: !12) -!24 = !DILocation(line: 5, column: 62, scope: !12) -!25 = !DILocation(line: 5, column: 52, scope: !12) -!26 = distinct !DISubprogram(name: "foo", scope: !13, file: !13, line: 7, type: !14, scopeLine: 7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!27 = !DILocalVariable(name: "a", scope: !28, file: !13, line: 2, type: !29) -!28 = distinct !DISubprogram(name: "removed", scope: !13, file: !13, line: 2, type: !14, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!29 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A", scope: !28, file: !13, line: 2, size: 32, elements: !30) -!30 = !{!31} -!31 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !29, file: !13, line: 2, baseType: !16, size: 32) -!32 = !DILocation(line: 2, column: 45, scope: !28, inlinedAt: !33) -!33 = distinct !DILocation(line: 7, column: 20, scope: !26) -!34 = !DILocation(line: 2, column: 57, scope: !28, inlinedAt: !33) -!35 = !DILocation(line: 2, column: 58, scope: !28, inlinedAt: !33) -!36 = !DILocation(line: 5, column: 49, scope: !12, inlinedAt: !37) -!37 = distinct !DILocation(line: 7, column: 32, scope: !26) -!38 = !DILocation(line: 5, column: 61, scope: !12, inlinedAt: !37) -!39 = !DILocation(line: 5, column: 62, scope: !12, inlinedAt: !37) -!40 = !DILocation(line: 7, column: 30, scope: !26) -!41 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/inlined-static-var.ll b/llvm/test/DebugInfo/Generic/inlined-static-var.ll deleted file mode 100644 index 60c986fc2b64..000000000000 --- a/llvm/test/DebugInfo/Generic/inlined-static-var.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { static int A; return A++; } -; -; __attribute__((always_inline)) -; int not_removed() { static int B; return B++; } -; -; int foo() { return removed() + not_removed(); } - -; Ensure that global variables belong to the correct subprograms even if those -; subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("A") -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("B") -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_inlined_subroutine -; CHECK: NULL -; CHECK: NULL - -@_ZZ11not_removedvE1A = internal global i32 0, align 4, !dbg !0 -@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !10 - -define dso_local i32 @_Z11not_removedv() !dbg !2 { - %1 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 - %2 = add nsw i32 %1, 1, !dbg !24 - store i32 %2, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 - ret i32 %1, !dbg !25 -} - -define dso_local i32 @_Z3foov() !dbg !26 { - %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !27 - %2 = add nsw i32 %1, 1, !dbg !27 - store i32 %2, i32* @_ZZ7removedvE1A, align 4, !dbg !27 - %3 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 - %4 = add nsw i32 %3, 1, !dbg !29 - store i32 %4, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 - %5 = add nsw i32 %1, %3, !dbg !31 - ret i32 %5, !dbg !32 -} - -!llvm.dbg.cu = !{!7} -!llvm.module.flags = !{!14, !15, !16, !17, !18, !19, !20, !21, !22} -!llvm.ident = !{!23} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 5, type: !6, isLocal: true, isDefinition: true) -!2 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 5, type: !4, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!3 = !DIFile(filename: "example.cpp", directory: "") -!4 = !DISubroutineType(types: !5) -!5 = !{!6} -!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !8, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) -!8 = !DIFile(filename: "example.cpp", directory: "") -!9 = !{!0, !10} -!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) -!11 = distinct !DIGlobalVariable(name: "A", scope: !12, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) -!12 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 2, type: !4, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!13 = !{} -!14 = !{i32 7, !"Dwarf Version", i32 4} -!15 = !{i32 2, !"Debug Info Version", i32 3} -!16 = !{i32 1, !"wchar_size", i32 4} -!17 = !{i32 1, !"branch-target-enforcement", i32 0} -!18 = !{i32 1, !"sign-return-address", i32 0} -!19 = !{i32 1, !"sign-return-address-all", i32 0} -!20 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -!21 = !{i32 7, !"uwtable", i32 1} -!22 = !{i32 7, !"frame-pointer", i32 1} -!23 = !{!"clang version 14.0.0"} -!24 = !DILocation(line: 5, column: 43, scope: !2) -!25 = !DILocation(line: 5, column: 35, scope: !2) -!26 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 7, type: !4, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!27 = !DILocation(line: 2, column: 39, scope: !12, inlinedAt: !28) -!28 = distinct !DILocation(line: 7, column: 20, scope: !26) -!29 = !DILocation(line: 5, column: 43, scope: !2, inlinedAt: !30) -!30 = distinct !DILocation(line: 7, column: 32, scope: !26) -!31 = !DILocation(line: 7, column: 30, scope: !26) -!32 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_static.ll b/llvm/test/DebugInfo/Generic/lexical_block_static.ll deleted file mode 100644 index 56327b27cdb1..000000000000 --- a/llvm/test/DebugInfo/Generic/lexical_block_static.ll +++ /dev/null @@ -1,141 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { -; { -; static int A; -; return A++; -; } -; } -; -; __attribute__((always_inline)) -; int not_removed() { -; { -; static int B; -; return B++; -; } -; } -; -; int foo() { -; { -; static int C; -; return ++C + removed() + not_removed(); -; } -; } - -; CHECK: DW_TAG_compile_unit - -; Out-of-line definition of `not_removed()`. -; The empty lexical block is created to match abstract origin. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: NULL - -; Abstract definition of `removed()` -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_AT_inline (DW_INL_inlined) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("A") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL -; CHECK: DW_TAG_base_type - -; Abstract definition of `not_removed()` -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_AT_inline (DW_INL_inlined) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("B") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL - -; Definition of foo(). -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_lexical_block -; CHECK: NULL -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_lexical_block -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("C") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -@_ZZ11not_removedvE1B = internal global i32 0, align 4, !dbg !0 -@_ZZ3foovE1C = internal global i32 0, align 4, !dbg !10 -@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !15 - -define dso_local i32 @_Z11not_removedv() !dbg !4 { -entry: - %0 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 - %inc = add nsw i32 %0, 1, !dbg !25 - store i32 %inc, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 - ret i32 %0, !dbg !26 -} - -define dso_local i32 @_Z3foov() !dbg !13 { -entry: - %0 = load i32, i32* @_ZZ3foovE1C, align 4, !dbg !27 - %inc = add nsw i32 %0, 1, !dbg !27 - store i32 %inc, i32* @_ZZ3foovE1C, align 4, !dbg !27 - %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !28 - %inc.i3 = add nsw i32 %1, 1, !dbg !28 - store i32 %inc.i3, i32* @_ZZ7removedvE1A, align 4, !dbg !28 - %add = add nsw i32 %inc, %1, !dbg !30 - %2 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 - %inc.i = add nsw i32 %2, 1, !dbg !31 - store i32 %inc.i, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 - %add2 = add nsw i32 %add, %2, !dbg !33 - ret i32 %add2, !dbg !34 -} - -!llvm.dbg.cu = !{!8} -!llvm.module.flags = !{!19, !20, !21, !22, !23} -!llvm.ident = !{!24} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 13, type: !7, isLocal: true, isDefinition: true) -!2 = distinct !DILexicalBlock(scope: !4, file: !3, line: 12, column: 3) -!3 = !DIFile(filename: "test_static.cpp", directory: "/") -!4 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!5 = !DISubroutineType(types: !6) -!6 = !{!7} -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e1d09ac2d118825452bfc26e44565f7f4122fd6d)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) -!9 = !{!0, !10, !15} -!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) -!11 = distinct !DIGlobalVariable(name: "C", scope: !12, file: !3, line: 20, type: !7, isLocal: true, isDefinition: true) -!12 = distinct !DILexicalBlock(scope: !13, file: !3, line: 19, column: 3) -!13 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 18, type: !5, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!14 = !{} -!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) -!16 = distinct !DIGlobalVariable(name: "A", scope: !17, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true) -!17 = distinct !DILexicalBlock(scope: !18, file: !3, line: 4, column: 3) -!18 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!19 = !{i32 7, !"Dwarf Version", i32 4} -!20 = !{i32 2, !"Debug Info Version", i32 3} -!21 = !{i32 1, !"wchar_size", i32 4} -!22 = !{i32 7, !"uwtable", i32 1} -!23 = !{i32 7, !"frame-pointer", i32 2} -!24 = !{!"clang version 14.0.0 (git@github.com:llvm/llvm-project.git)"} -!25 = !DILocation(line: 14, column: 13, scope: !2) -!26 = !DILocation(line: 14, column: 5, scope: !2) -!27 = !DILocation(line: 21, column: 12, scope: !12) -!28 = !DILocation(line: 6, column: 13, scope: !17, inlinedAt: !29) -!29 = distinct !DILocation(line: 21, column: 18, scope: !12) -!30 = !DILocation(line: 21, column: 16, scope: !12) -!31 = !DILocation(line: 14, column: 13, scope: !2, inlinedAt: !32) -!32 = distinct !DILocation(line: 21, column: 30, scope: !12) -!33 = !DILocation(line: 21, column: 28, scope: !12) -!34 = !DILocation(line: 21, column: 5, scope: !12) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_types.ll b/llvm/test/DebugInfo/Generic/lexical_block_types.ll deleted file mode 100644 index 1d4e9e7eb3e7..000000000000 --- a/llvm/test/DebugInfo/Generic/lexical_block_types.ll +++ /dev/null @@ -1,421 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; void removed() { -; struct A1 { int i; }; -; typedef int Int1; -; { -; struct I1 { Int1 j; }; -; struct C1 { typedef char Char1; Char1 c; }; -; A1 a1; a1.i++; -; { -; I1 i1; i1.j++; -; C1 c1; c1.c++; -; } -; } -; } -; -; __attribute__((always_inline)) -; void not_removed() { -; struct A2 { int i; }; -; typedef int Int2; -; { -; struct I2 { Int2 j; }; -; struct C2 { typedef char Char2; Char2 c; }; -; A2 a2; a2.i++; -; { -; I2 i2; i2.j++; -; C2 c2; c2.c++; -; } -; } -; } -; -; void foo() { -; struct A3 { int i; }; -; typedef int Int3; -; { -; struct I3 { Int3 j; }; -; { -; struct C3 { typedef char Char3; Char3 c; }; -; A3 a3; a3.i++; -; { -; I3 i3; i3.j++; -; C3 c3; c3.c++; -; } -; } -; } -; removed(); -; not_removed(); -; } -; -; CHECK: DW_TAG_compile_unit - -; Out-of-line definition of `not_removed()` shouldn't contain any debug info for types. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "a2" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "i2" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "c2" -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; Abstract definition of `removed()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_AT_inline (DW_INL_inlined) - -; I1 and C1 defined in the first lexical block, typedef Char1 is a child of C1. -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a1") -; CHECK: DW_AT_type {{.*}} "A1" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type {{.*}} "I1" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type {{.*}} "C1" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I1") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int1" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C1") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C1::Char1" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char1") -; CHECK: NULL -; CHECK: NULL - -; A1 and typedef Int1 defined in subprogram scope. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A1") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int1") -; CHECK: NULL - -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_base_type - -; Abstract definition of `not_removed()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_AT_inline (DW_INL_inlined) - -; I2 and C2 defined in the first lexical block, typedef Char2 is a child of C2. -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a2") -; CHECK: DW_AT_type {{.*}} "A2" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("i2") -; CHECK: DW_AT_type {{.*}} "I2" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("c2") -; CHECK: DW_AT_type {{.*}} "C2" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I2") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int2" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C2") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C2::Char2" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char2") -; CHECK: NULL -; CHECK: NULL - -; A2 and typedef Int2 defined in subprogram scope. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A2") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int2") -; CHECK: NULL - -; Definition of `foo()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") - -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a3") -; CHECK: DW_AT_type {{.*}} "A3" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("i3") -; CHECK: DW_AT_type {{.*}} "I3" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("c3") -; CHECK: DW_AT_type {{.*}} "C3" -; CHECK: NULL - -; C3 has the inner lexical block scope, typedef Char3 is a child of C3. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C3") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C3::Char3" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char3") -; CHECK: NULL -; CHECK: NULL - -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin {{.*}} "_Z7removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; A3 and typedef Int3 defined in the subprogram scope. -; FIXME: I3 has subprogram scope here, but should be in the outer lexical block -; (which is ommitted). It wasn't placed correctly, because it's the only non-scope -; entity in the block and it isn't listed in retainedTypes; we simply wasn't aware -; about it while deciding whether to create a lexical block or not. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A3") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I3") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int3" -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int3") -; CHECK: NULL -; CHECK: NULL - -%struct.A2 = type { i32 } -%struct.I2 = type { i32 } -%struct.C2 = type { i8 } -%struct.A1 = type { i32 } -%struct.I1 = type { i32 } -%struct.C1 = type { i8 } -%struct.A3 = type { i32 } -%struct.I3 = type { i32 } -%struct.C3 = type { i8 } - -define dso_local void @_Z11not_removedv() !dbg !8 { -entry: - %a2 = alloca %struct.A2, align 4 - %i2 = alloca %struct.I2, align 4 - %c2 = alloca %struct.C2, align 1 - call void @llvm.dbg.declare(metadata %struct.A2* %a2, metadata !12, metadata !DIExpression()), !dbg !18 - %i = getelementptr inbounds %struct.A2, %struct.A2* %a2, i32 0, i32 0, !dbg !19 - %0 = load i32, i32* %i, align 4, !dbg !20 - %inc = add nsw i32 %0, 1, !dbg !20 - store i32 %inc, i32* %i, align 4, !dbg !20 - call void @llvm.dbg.declare(metadata %struct.I2* %i2, metadata !21, metadata !DIExpression()), !dbg !27 - %j = getelementptr inbounds %struct.I2, %struct.I2* %i2, i32 0, i32 0, !dbg !28 - %1 = load i32, i32* %j, align 4, !dbg !29 - %inc1 = add nsw i32 %1, 1, !dbg !29 - store i32 %inc1, i32* %j, align 4, !dbg !29 - call void @llvm.dbg.declare(metadata %struct.C2* %c2, metadata !30, metadata !DIExpression()), !dbg !36 - %c = getelementptr inbounds %struct.C2, %struct.C2* %c2, i32 0, i32 0, !dbg !37 - %2 = load i8, i8* %c, align 1, !dbg !38 - %inc2 = add i8 %2, 1, !dbg !38 - store i8 %inc2, i8* %c, align 1, !dbg !38 - ret void, !dbg !39 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -define dso_local void @_Z3foov() !dbg !40 { -entry: - %a1.i = alloca %struct.A1, align 4 - %i1.i = alloca %struct.I1, align 4 - %c1.i = alloca %struct.C1, align 1 - %a2.i = alloca %struct.A2, align 4 - %i2.i = alloca %struct.I2, align 4 - %c2.i = alloca %struct.C2, align 1 - %a3 = alloca %struct.A3, align 4 - %i3 = alloca %struct.I3, align 4 - %c3 = alloca %struct.C3, align 1 - call void @llvm.dbg.declare(metadata %struct.A3* %a3, metadata !41, metadata !DIExpression()), !dbg !47 - %i = getelementptr inbounds %struct.A3, %struct.A3* %a3, i32 0, i32 0, !dbg !48 - %0 = load i32, i32* %i, align 4, !dbg !49 - %inc = add nsw i32 %0, 1, !dbg !49 - store i32 %inc, i32* %i, align 4, !dbg !49 - call void @llvm.dbg.declare(metadata %struct.I3* %i3, metadata !50, metadata !DIExpression()), !dbg !56 - %j = getelementptr inbounds %struct.I3, %struct.I3* %i3, i32 0, i32 0, !dbg !57 - %1 = load i32, i32* %j, align 4, !dbg !58 - %inc1 = add nsw i32 %1, 1, !dbg !58 - store i32 %inc1, i32* %j, align 4, !dbg !58 - call void @llvm.dbg.declare(metadata %struct.C3* %c3, metadata !59, metadata !DIExpression()), !dbg !64 - %c = getelementptr inbounds %struct.C3, %struct.C3* %c3, i32 0, i32 0, !dbg !65 - %2 = load i8, i8* %c, align 1, !dbg !66 - %inc2 = add i8 %2, 1, !dbg !66 - store i8 %inc2, i8* %c, align 1, !dbg !66 - call void @llvm.dbg.declare(metadata %struct.A1* %a1.i, metadata !67, metadata !DIExpression()), !dbg !73 - %i.i3 = getelementptr inbounds %struct.A1, %struct.A1* %a1.i, i32 0, i32 0, !dbg !75 - %3 = load i32, i32* %i.i3, align 4, !dbg !76 - %inc.i4 = add nsw i32 %3, 1, !dbg !76 - store i32 %inc.i4, i32* %i.i3, align 4, !dbg !76 - call void @llvm.dbg.declare(metadata %struct.I1* %i1.i, metadata !77, metadata !DIExpression()), !dbg !83 - %j.i5 = getelementptr inbounds %struct.I1, %struct.I1* %i1.i, i32 0, i32 0, !dbg !84 - %4 = load i32, i32* %j.i5, align 4, !dbg !85 - %inc1.i6 = add nsw i32 %4, 1, !dbg !85 - store i32 %inc1.i6, i32* %j.i5, align 4, !dbg !85 - call void @llvm.dbg.declare(metadata %struct.C1* %c1.i, metadata !86, metadata !DIExpression()), !dbg !91 - %c.i7 = getelementptr inbounds %struct.C1, %struct.C1* %c1.i, i32 0, i32 0, !dbg !92 - %5 = load i8, i8* %c.i7, align 1, !dbg !93 - %inc2.i8 = add i8 %5, 1, !dbg !93 - store i8 %inc2.i8, i8* %c.i7, align 1, !dbg !93 - call void @llvm.dbg.declare(metadata %struct.A2* %a2.i, metadata !12, metadata !DIExpression()), !dbg !94 - %i.i = getelementptr inbounds %struct.A2, %struct.A2* %a2.i, i32 0, i32 0, !dbg !96 - %6 = load i32, i32* %i.i, align 4, !dbg !97 - %inc.i = add nsw i32 %6, 1, !dbg !97 - store i32 %inc.i, i32* %i.i, align 4, !dbg !97 - call void @llvm.dbg.declare(metadata %struct.I2* %i2.i, metadata !21, metadata !DIExpression()), !dbg !98 - %j.i = getelementptr inbounds %struct.I2, %struct.I2* %i2.i, i32 0, i32 0, !dbg !99 - %7 = load i32, i32* %j.i, align 4, !dbg !100 - %inc1.i = add nsw i32 %7, 1, !dbg !100 - store i32 %inc1.i, i32* %j.i, align 4, !dbg !100 - call void @llvm.dbg.declare(metadata %struct.C2* %c2.i, metadata !30, metadata !DIExpression()), !dbg !101 - %c.i = getelementptr inbounds %struct.C2, %struct.C2* %c2.i, i32 0, i32 0, !dbg !102 - %8 = load i8, i8* %c.i, align 1, !dbg !103 - %inc2.i = add i8 %8, 1, !dbg !103 - store i8 %inc2.i, i8* %c.i, align 1, !dbg !103 - ret void, !dbg !104 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "test.cpp", directory: "/") -!2 = !{i32 7, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 7, !"uwtable", i32 1} -!6 = !{i32 7, !"frame-pointer", i32 2} -!7 = !{!"clang version 14.0.0"} -!8 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !1, file: !1, line: 17, type: !9, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!9 = !DISubroutineType(types: !10) -!10 = !{null} -!11 = !{} -!12 = !DILocalVariable(name: "a2", scope: !13, file: !1, line: 23, type: !14) -!13 = distinct !DILexicalBlock(scope: !8, file: !1, line: 20, column: 3) -!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A2", scope: !8, file: !1, line: 18, size: 32, flags: DIFlagTypePassByValue, elements: !15) -!15 = !{!16} -!16 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !14, file: !1, line: 18, baseType: !17, size: 32) -!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!18 = !DILocation(line: 23, column: 8, scope: !13) -!19 = !DILocation(line: 23, column: 15, scope: !13) -!20 = !DILocation(line: 23, column: 16, scope: !13) -!21 = !DILocalVariable(name: "i2", scope: !22, file: !1, line: 25, type: !23) -!22 = distinct !DILexicalBlock(scope: !13, file: !1, line: 24, column: 5) -!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I2", scope: !13, file: !1, line: 21, size: 32, flags: DIFlagTypePassByValue, elements: !24) -!24 = !{!25} -!25 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !23, file: !1, line: 21, baseType: !26, size: 32) -!26 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int2", scope: !8, file: !1, line: 19, baseType: !17) -!27 = !DILocation(line: 25, column: 10, scope: !22) -!28 = !DILocation(line: 25, column: 17, scope: !22) -!29 = !DILocation(line: 25, column: 18, scope: !22) -!30 = !DILocalVariable(name: "c2", scope: !22, file: !1, line: 26, type: !31) -!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C2", scope: !13, file: !1, line: 22, size: 8, flags: DIFlagTypePassByValue, elements: !32) -!32 = !{!33} -!33 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !31, file: !1, line: 22, baseType: !34, size: 8) -!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char2", scope: !31, file: !1, line: 22, baseType: !35) -!35 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!36 = !DILocation(line: 26, column: 10, scope: !22) -!37 = !DILocation(line: 26, column: 17, scope: !22) -!38 = !DILocation(line: 26, column: 18, scope: !22) -!39 = !DILocation(line: 29, column: 1, scope: !8) -!40 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 31, type: !9, scopeLine: 31, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!41 = !DILocalVariable(name: "a3", scope: !42, file: !1, line: 38, type: !44) -!42 = distinct !DILexicalBlock(scope: !43, file: !1, line: 36, column: 5) -!43 = distinct !DILexicalBlock(scope: !40, file: !1, line: 34, column: 3) -!44 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A3", scope: !40, file: !1, line: 32, size: 32, flags: DIFlagTypePassByValue, elements: !45) -!45 = !{!46} -!46 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !44, file: !1, line: 32, baseType: !17, size: 32) -!47 = !DILocation(line: 38, column: 10, scope: !42) -!48 = !DILocation(line: 38, column: 17, scope: !42) -!49 = !DILocation(line: 38, column: 18, scope: !42) -!50 = !DILocalVariable(name: "i3", scope: !51, file: !1, line: 40, type: !52) -!51 = distinct !DILexicalBlock(scope: !42, file: !1, line: 39, column: 7) -!52 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I3", scope: !43, file: !1, line: 35, size: 32, flags: DIFlagTypePassByValue, elements: !53) -!53 = !{!54} -!54 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !52, file: !1, line: 35, baseType: !55, size: 32) -!55 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int3", scope: !40, file: !1, line: 33, baseType: !17) -!56 = !DILocation(line: 40, column: 12, scope: !51) -!57 = !DILocation(line: 40, column: 19, scope: !51) -!58 = !DILocation(line: 40, column: 20, scope: !51) -!59 = !DILocalVariable(name: "c3", scope: !51, file: !1, line: 41, type: !60) -!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C3", scope: !42, file: !1, line: 37, size: 8, flags: DIFlagTypePassByValue, elements: !61) -!61 = !{!62} -!62 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !60, file: !1, line: 37, baseType: !63, size: 8) -!63 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char3", scope: !60, file: !1, line: 37, baseType: !35) -!64 = !DILocation(line: 41, column: 12, scope: !51) -!65 = !DILocation(line: 41, column: 19, scope: !51) -!66 = !DILocation(line: 41, column: 20, scope: !51) -!67 = !DILocalVariable(name: "a1", scope: !68, file: !1, line: 8, type: !70) -!68 = distinct !DILexicalBlock(scope: !69, file: !1, line: 5, column: 3) -!69 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !1, file: !1, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A1", scope: !69, file: !1, line: 3, size: 32, flags: DIFlagTypePassByValue, elements: !71, identifier: "_ZTSZ7removedvE2A1") -!71 = !{!72} -!72 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !70, file: !1, line: 3, baseType: !17, size: 32) -!73 = !DILocation(line: 8, column: 8, scope: !68, inlinedAt: !74) -!74 = distinct !DILocation(line: 45, column: 3, scope: !40) -!75 = !DILocation(line: 8, column: 15, scope: !68, inlinedAt: !74) -!76 = !DILocation(line: 8, column: 16, scope: !68, inlinedAt: !74) -!77 = !DILocalVariable(name: "i1", scope: !78, file: !1, line: 10, type: !79) -!78 = distinct !DILexicalBlock(scope: !68, file: !1, line: 9, column: 5) -!79 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I1", scope: !68, file: !1, line: 6, size: 32, flags: DIFlagTypePassByValue, elements: !80, identifier: "_ZTSZ7removedvE2I1") -!80 = !{!81} -!81 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !79, file: !1, line: 6, baseType: !82, size: 32) -!82 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int1", scope: !69, file: !1, line: 4, baseType: !17) -!83 = !DILocation(line: 10, column: 10, scope: !78, inlinedAt: !74) -!84 = !DILocation(line: 10, column: 17, scope: !78, inlinedAt: !74) -!85 = !DILocation(line: 10, column: 18, scope: !78, inlinedAt: !74) -!86 = !DILocalVariable(name: "c1", scope: !78, file: !1, line: 11, type: !87) -!87 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C1", scope: !68, file: !1, line: 7, size: 8, flags: DIFlagTypePassByValue, elements: !88, identifier: "_ZTSZ7removedvE2C1") -!88 = !{!89} -!89 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !87, file: !1, line: 7, baseType: !90, size: 8) -!90 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char1", scope: !87, file: !1, line: 7, baseType: !35) -!91 = !DILocation(line: 11, column: 10, scope: !78, inlinedAt: !74) -!92 = !DILocation(line: 11, column: 17, scope: !78, inlinedAt: !74) -!93 = !DILocation(line: 11, column: 18, scope: !78, inlinedAt: !74) -!94 = !DILocation(line: 23, column: 8, scope: !13, inlinedAt: !95) -!95 = distinct !DILocation(line: 46, column: 3, scope: !40) -!96 = !DILocation(line: 23, column: 15, scope: !13, inlinedAt: !95) -!97 = !DILocation(line: 23, column: 16, scope: !13, inlinedAt: !95) -!98 = !DILocation(line: 25, column: 10, scope: !22, inlinedAt: !95) -!99 = !DILocation(line: 25, column: 17, scope: !22, inlinedAt: !95) -!100 = !DILocation(line: 25, column: 18, scope: !22, inlinedAt: !95) -!101 = !DILocation(line: 26, column: 10, scope: !22, inlinedAt: !95) -!102 = !DILocation(line: 26, column: 17, scope: !22, inlinedAt: !95) -!103 = !DILocation(line: 26, column: 18, scope: !22, inlinedAt: !95) -!104 = !DILocation(line: 47, column: 1, scope: !40) diff --git a/llvm/test/DebugInfo/Generic/namespace.ll b/llvm/test/DebugInfo/Generic/namespace.ll index 7b9bf9b531b8..2c635ee5731a 100644 --- a/llvm/test/DebugInfo/Generic/namespace.ll +++ b/llvm/test/DebugInfo/Generic/namespace.ll @@ -13,19 +13,6 @@ ; CHECK-NOT: DW_AT_decl_file ; CHECK-NOT: DW_AT_decl_line -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_MIPS_linkage_name -; CHECK: DW_AT_name ("f1") -; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram -; CHECK: DW_AT_MIPS_linkage_name -; CHECK: DW_AT_name ("f1") -; CHECK: DW_TAG_formal_parameter -; CHECK: NULL - -; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram -; CHECK: DW_AT_name ("func_fwd") -; CHECK-NOT: DW_AT_declaration - ; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable ; CHECK: DW_AT_name ("i") ; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable @@ -37,6 +24,15 @@ ; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type ; CHECK: DW_AT_name ("bar") +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_MIPS_linkage_name +; CHECK: DW_AT_name ("f1") +; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram +; CHECK: DW_AT_MIPS_linkage_name +; CHECK: DW_AT_name ("f1") +; CHECK: DW_TAG_formal_parameter +; CHECK: NULL + ; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef ; CHECK: DW_AT_name ("baz") @@ -47,6 +43,10 @@ ; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram ; CHECK: DW_AT_name ("func_decl") ; CHECK: DW_AT_declaration + +; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +; CHECK: DW_AT_name ("func_fwd") +; CHECK-NOT: DW_AT_declaration ; CHECK: NULL ; CHECK: DW_TAG_imported_module @@ -56,17 +56,18 @@ ; CHECK: DW_TAG_imported_declaration ; CHECK: NULL +; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_imported_module +; CHECK: DW_AT_decl_file ([[F2:.*]]) +; CHECK: DW_AT_decl_line (18) +; CHECK: DW_AT_import ([[NS1]]) +; CHECK: DW_TAG_imported_declaration + ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_MIPS_linkage_name ; CHECK: DW_AT_name ("func") ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_decl_file ([[F2]]) -; CHECK: DW_AT_decl_line (23) -; CHECK: DW_AT_import {{.*}} -; CHECK: NULL ; CHECK: DW_TAG_imported_module ; CHECK: DW_AT_decl_file ([[F2:.*]]) ; CHECK: DW_AT_decl_line (26) @@ -117,16 +118,16 @@ ; CHECK: DW_AT_decl_file ([[F2]]) ; CHECK: DW_AT_decl_line (37) ; CHECK: DW_AT_import ([[FUNC_FWD]]) +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_imported_module +; CHECK: DW_AT_decl_file ([[F2]]) +; CHECK: DW_AT_decl_line (23) +; CHECK: DW_AT_import {{.*}} +; CHECK: NULL ; CHECK: NULL ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_decl_file ([[F2:.*]]) -; CHECK: DW_AT_decl_line (18) -; CHECK: DW_AT_import ([[NS1]]) -; CHECK: DW_TAG_imported_declaration -; CHECK: DW_TAG_base_type ; CHECK: NULL ; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths diff --git a/llvm/test/DebugInfo/Generic/varargs.ll b/llvm/test/DebugInfo/Generic/varargs.ll index 99acaff77671..81b5091afd42 100644 --- a/llvm/test/DebugInfo/Generic/varargs.ll +++ b/llvm/test/DebugInfo/Generic/varargs.ll @@ -12,23 +12,23 @@ ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("b") +; CHECK: DW_AT_name ("a") ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_variable -; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_variable +; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_unspecified_parameters ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("a") +; CHECK: DW_AT_name ("b") ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_formal_parameter +; CHECK: DW_TAG_variable +; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_unspecified_parameters ; diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir index 261da42a88d0..98ec9dc8fa41 100644 --- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir +++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir @@ -5,7 +5,7 @@ # encountering an IMPLICIT_DEF in its own lexical scope. # CHECK: .debug_info contents: -# CHECK: DW_TAG_formal_parameter [13] +# CHECK: DW_TAG_formal_parameter [14] # CHECK-NEXT: DW_AT_const_value [DW_FORM_udata] (0) # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "name" --- | diff --git a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll index d2b7a31ec9f6..73ec34787ad6 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll @@ -105,68 +105,68 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 2 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 64 // DW_AT_frame_base -; CHECK-NEXT:.b8 10 // DW_FORM_block1 -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 52 // DW_TAG_variable +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 51 // DW_AT_address_class +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 2 // DW_AT_location +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 3 // Abbreviation Code -; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 36 // DW_TAG_base_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_encoding ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 4 // Abbreviation Code -; CHECK-NEXT:.b8 52 // DW_TAG_variable -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 64 // DW_AT_frame_base +; CHECK-NEXT:.b8 10 // DW_FORM_block1 +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 51 // DW_AT_address_class -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 2 // DW_AT_location -; CHECK-NEXT:.b8 10 // DW_FORM_block1 +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 5 // Abbreviation Code -; CHECK-NEXT:.b8 36 // DW_TAG_base_type +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 62 // DW_AT_encoding +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 11 // DW_AT_byte_size +; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 6 // Abbreviation Code @@ -258,7 +258,46 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x65:0x45 DW_TAG_subprogram +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x65:0x1a DW_TAG_variable +; CHECK-NEXT:.b8 71 // DW_AT_name +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 79 +; CHECK-NEXT:.b8 66 +; CHECK-NEXT:.b8 65 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 127 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 // DW_AT_address_class +; CHECK-NEXT:.b8 9 // DW_AT_location +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b64 GLOBAL +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x7f:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x86:0x1a DW_TAG_variable +; CHECK-NEXT:.b8 83 // DW_AT_name +; CHECK-NEXT:.b8 72 +; CHECK-NEXT:.b8 65 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 127 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 4 // DW_AT_decl_line +; CHECK-NEXT:.b8 8 // DW_AT_address_class +; CHECK-NEXT:.b8 9 // DW_AT_location +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b64 SHARED +; CHECK-NEXT:.b8 4 // Abbrev [4] 0xa0:0x45 DW_TAG_subprogram ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -276,71 +315,32 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x85:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc0:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 229 // DW_AT_type -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x8e:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc9:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 238 // DW_AT_type -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x97:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd2:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 238 // DW_AT_type -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xa0:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xdb:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 196 // DW_AT_type +; CHECK-NEXT:.b32 127 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 4 // Abbrev [4] 0xaa:0x1a DW_TAG_variable -; CHECK-NEXT:.b8 71 // DW_AT_name -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 79 -; CHECK-NEXT:.b8 66 -; CHECK-NEXT:.b8 65 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 196 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_AT_address_class -; CHECK-NEXT:.b8 9 // DW_AT_location -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b64 GLOBAL -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc4:0x7 DW_TAG_base_type -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 4 // Abbrev [4] 0xcb:0x1a DW_TAG_variable -; CHECK-NEXT:.b8 83 // DW_AT_name -; CHECK-NEXT:.b8 72 -; CHECK-NEXT:.b8 65 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 196 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 4 // DW_AT_decl_line -; CHECK-NEXT:.b8 8 // DW_AT_address_class -; CHECK-NEXT:.b8 9 // DW_AT_location -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b64 SHARED -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe5:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe5:0x9 DW_TAG_base_type ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 7329e92337ec..2edd807a8d82 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -127,39 +127,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 2 // Abbreviation Code -; CHECK-NEXT:.b8 19 // DW_TAG_structure_type +; CHECK-NEXT:.b8 57 // DW_TAG_namespace ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 11 // DW_AT_byte_size -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 3 // Abbreviation Code +; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 24 // DW_AT_import +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 3 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbreviation Code +; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 24 // DW_AT_import ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 4 // Abbreviation Code +; CHECK-NEXT:.b8 5 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name @@ -175,69 +171,43 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 5 // Abbreviation Code +; CHECK-NEXT:.b8 6 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 52 // DW_AT_artificial -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 6 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 7 // Abbreviation Code +; CHECK-NEXT:.b8 36 // DW_TAG_base_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 62 // DW_AT_encoding ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 50 // DW_AT_accessibility +; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 7 // Abbreviation Code -; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 8 // Abbreviation Code +; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 8 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 50 // DW_AT_accessibility -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 9 // Abbreviation Code +; CHECK-NEXT:.b8 38 // DW_TAG_const_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 9 // Abbreviation Code +; CHECK-NEXT:.b8 10 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -250,80 +220,40 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 50 // DW_AT_accessibility -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 10 // Abbreviation Code -; CHECK-NEXT:.b8 36 // DW_TAG_base_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 62 // DW_AT_encoding -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 11 // DW_AT_byte_size -; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 11 // Abbreviation Code -; CHECK-NEXT:.b8 13 // DW_TAG_member +; CHECK-NEXT:.b8 22 // DW_TAG_typedef ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 56 // DW_AT_data_member_location -; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 12 // Abbreviation Code -; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type +; CHECK-NEXT:.b8 19 // DW_TAG_structure_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 13 // Abbreviation Code -; CHECK-NEXT:.b8 38 // DW_TAG_const_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 14 // Abbreviation Code -; CHECK-NEXT:.b8 16 // DW_TAG_reference_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 15 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 71 // DW_AT_specification -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 32 // DW_AT_inline -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 16 // Abbreviation Code ; CHECK-NEXT:.b8 19 // DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 17 // Abbreviation Code +; CHECK-NEXT:.b8 14 // Abbreviation Code ; CHECK-NEXT:.b8 13 // DW_TAG_member ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name @@ -333,14 +263,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 56 // DW_AT_data_member_location ; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 18 // Abbreviation Code +; CHECK-NEXT:.b8 15 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -351,14 +281,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 135 // DW_AT_noreturn +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 19 // Abbreviation Code +; CHECK-NEXT:.b8 16 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -373,6 +303,21 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 17 // Abbreviation Code +; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 18 // Abbreviation Code +; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 19 // Abbreviation Code +; CHECK-NEXT:.b8 38 // DW_TAG_const_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 20 // Abbreviation Code ; CHECK-NEXT:.b8 22 // DW_TAG_typedef ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no @@ -387,45 +332,65 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 21 // Abbreviation Code +; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 22 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 32 // DW_AT_inline +; CHECK-NEXT:.b8 135 // DW_AT_noreturn +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 23 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 22 // Abbreviation Code -; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 24 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 23 // Abbreviation Code +; CHECK-NEXT:.b8 25 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 64 // DW_AT_frame_base -; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 64 ; CHECK-NEXT:.b8 8 // DW_FORM_string @@ -435,91 +400,89 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 24 // Abbreviation Code -; CHECK-NEXT:.b8 52 // DW_TAG_variable -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 26 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 25 // Abbreviation Code -; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 88 // DW_AT_call_file +; CHECK-NEXT:.b8 27 // Abbreviation Code +; CHECK-NEXT:.b8 19 // DW_TAG_structure_type +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 26 // Abbreviation Code -; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 88 // DW_AT_call_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 28 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 27 // Abbreviation Code +; CHECK-NEXT:.b8 29 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 52 // DW_AT_artificial +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 28 // Abbreviation Code -; CHECK-NEXT:.b8 57 // DW_TAG_namespace +; CHECK-NEXT:.b8 30 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 29 // Abbreviation Code -; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 24 // DW_AT_import -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 30 // Abbreviation Code -; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 50 // DW_AT_accessibility ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 -; CHECK-NEXT:.b8 24 // DW_AT_import -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 31 // Abbreviation Code @@ -534,15 +497,20 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 50 // DW_AT_accessibility +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 32 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -555,60 +523,60 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 50 // DW_AT_accessibility +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 33 // Abbreviation Code -; CHECK-NEXT:.b8 22 // DW_TAG_typedef +; CHECK-NEXT:.b8 16 // DW_TAG_reference_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 34 // Abbreviation Code -; CHECK-NEXT:.b8 19 // DW_TAG_structure_type +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 71 // DW_AT_specification +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 32 // DW_AT_inline +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 35 // Abbreviation Code ; CHECK-NEXT:.b8 19 // DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 36 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 13 // DW_TAG_member ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 // DW_FORM_data2 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 135 // DW_AT_noreturn -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 56 // DW_AT_data_member_location +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 37 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -624,65 +592,45 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 38 // Abbreviation Code -; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 39 // Abbreviation Code -; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 40 // Abbreviation Code -; CHECK-NEXT:.b8 38 // DW_TAG_const_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 41 // Abbreviation Code -; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 42 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 135 // DW_AT_noreturn -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 32 // DW_AT_inline +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 43 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbreviation Code +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 44 // Abbreviation Code +; CHECK-NEXT:.b8 40 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 64 // DW_AT_frame_base +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 64 ; CHECK-NEXT:.b8 8 // DW_FORM_string @@ -691,11 +639,63 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 41 // Abbreviation Code +; CHECK-NEXT:.b8 52 // DW_TAG_variable +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 42 // Abbreviation Code +; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 88 // DW_AT_call_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 43 // Abbreviation Code +; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 88 // DW_AT_call_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 44 // Abbreviation Code +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 0 // EOM(3) @@ -744,7658 +744,7658 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x41:0x22a DW_TAG_structure_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x41:0x588 DW_TAG_namespace +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 77 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5f:0x4f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x46:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 202 // DW_AT_decl_line +; CHECK-NEXT:.b32 1481 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x4d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 203 // DW_AT_decl_line +; CHECK-NEXT:.b32 1525 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x54:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 204 // DW_AT_decl_line +; CHECK-NEXT:.b32 1563 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 205 // DW_AT_decl_line +; CHECK-NEXT:.b32 1594 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x62:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 206 // DW_AT_decl_line +; CHECK-NEXT:.b32 1623 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x69:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 207 // DW_AT_decl_line +; CHECK-NEXT:.b32 1654 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x70:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 208 // DW_AT_decl_line +; CHECK-NEXT:.b32 1683 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x77:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 209 // DW_AT_decl_line +; CHECK-NEXT:.b32 1720 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x7e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b32 1751 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x85:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 211 // DW_AT_decl_line +; CHECK-NEXT:.b32 1780 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x8c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 212 // DW_AT_decl_line +; CHECK-NEXT:.b32 1809 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x93:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 213 // DW_AT_decl_line +; CHECK-NEXT:.b32 1852 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x9a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 214 // DW_AT_decl_line +; CHECK-NEXT:.b32 1879 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xa1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 215 // DW_AT_decl_line +; CHECK-NEXT:.b32 1908 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xa8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 216 // DW_AT_decl_line +; CHECK-NEXT:.b32 1935 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xaf:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 217 // DW_AT_decl_line +; CHECK-NEXT:.b32 1964 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xb6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 218 // DW_AT_decl_line +; CHECK-NEXT:.b32 1991 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xbd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 219 // DW_AT_decl_line +; CHECK-NEXT:.b32 2020 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xc4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 220 // DW_AT_decl_line +; CHECK-NEXT:.b32 2051 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xcb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 221 // DW_AT_decl_line +; CHECK-NEXT:.b32 2080 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xd2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 222 // DW_AT_decl_line +; CHECK-NEXT:.b32 2115 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xd9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 223 // DW_AT_decl_line +; CHECK-NEXT:.b32 2146 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 224 // DW_AT_decl_line +; CHECK-NEXT:.b32 2185 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 225 // DW_AT_decl_line +; CHECK-NEXT:.b32 2220 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xee:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 226 // DW_AT_decl_line +; CHECK-NEXT:.b32 2255 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xf5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 227 // DW_AT_decl_line +; CHECK-NEXT:.b32 2290 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xfc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 228 // DW_AT_decl_line +; CHECK-NEXT:.b32 2339 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x103:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 229 // DW_AT_decl_line +; CHECK-NEXT:.b32 2382 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x10a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 230 // DW_AT_decl_line +; CHECK-NEXT:.b32 2419 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x111:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 231 // DW_AT_decl_line +; CHECK-NEXT:.b32 2450 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x118:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 232 // DW_AT_decl_line +; CHECK-NEXT:.b32 2495 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x11f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 233 // DW_AT_decl_line +; CHECK-NEXT:.b32 2540 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x126:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 234 // DW_AT_decl_line +; CHECK-NEXT:.b32 2596 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x12d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 235 // DW_AT_decl_line +; CHECK-NEXT:.b32 2627 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x134:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 236 // DW_AT_decl_line +; CHECK-NEXT:.b32 2666 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x13b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 237 // DW_AT_decl_line +; CHECK-NEXT:.b32 2716 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x142:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 238 // DW_AT_decl_line +; CHECK-NEXT:.b32 2770 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x149:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 239 // DW_AT_decl_line +; CHECK-NEXT:.b32 2801 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x150:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 240 // DW_AT_decl_line +; CHECK-NEXT:.b32 2838 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x157:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 241 // DW_AT_decl_line +; CHECK-NEXT:.b32 2888 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x15e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 242 // DW_AT_decl_line +; CHECK-NEXT:.b32 2929 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x165:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 243 // DW_AT_decl_line +; CHECK-NEXT:.b32 2966 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x16c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 244 // DW_AT_decl_line +; CHECK-NEXT:.b32 2999 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x173:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 245 // DW_AT_decl_line +; CHECK-NEXT:.b32 3030 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x17a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 246 // DW_AT_decl_line +; CHECK-NEXT:.b32 3063 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x181:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 247 // DW_AT_decl_line +; CHECK-NEXT:.b32 3090 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x188:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 248 // DW_AT_decl_line +; CHECK-NEXT:.b32 3121 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x18f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 249 // DW_AT_decl_line +; CHECK-NEXT:.b32 3152 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x196:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 250 // DW_AT_decl_line +; CHECK-NEXT:.b32 3181 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x19d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 251 // DW_AT_decl_line +; CHECK-NEXT:.b32 3210 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1a4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 252 // DW_AT_decl_line +; CHECK-NEXT:.b32 3241 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1ab:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 253 // DW_AT_decl_line +; CHECK-NEXT:.b32 3274 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1b2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 254 // DW_AT_decl_line +; CHECK-NEXT:.b32 3309 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1b9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 255 // DW_AT_decl_line +; CHECK-NEXT:.b32 3350 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 0 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3407 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1c8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 1 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3438 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1d0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 2 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3477 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1d8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3522 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 4 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3555 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3600 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1f0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 6 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3646 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1f8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3675 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x200:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 8 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3706 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x208:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 9 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3747 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x210:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 10 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3786 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x218:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3821 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x220:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3848 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x228:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 13 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3877 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x230:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 14 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3906 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x238:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 15 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3933 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x240:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 16 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3962 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x248:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 17 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3995 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x250:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 102 // DW_AT_decl_line +; CHECK-NEXT:.b32 4026 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x257:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b32 4046 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x25e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 140 // DW_AT_decl_line +; CHECK-NEXT:.b32 4066 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x265:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 159 // DW_AT_decl_line +; CHECK-NEXT:.b32 4086 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x26c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 180 // DW_AT_decl_line +; CHECK-NEXT:.b32 4112 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x273:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 199 // DW_AT_decl_line +; CHECK-NEXT:.b32 4132 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x27a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 218 // DW_AT_decl_line +; CHECK-NEXT:.b32 4151 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x281:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 237 // DW_AT_decl_line +; CHECK-NEXT:.b32 4171 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x288:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 0 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4190 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x290:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 19 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4210 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x298:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 38 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4231 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2a0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4256 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2a8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xae:0x4f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 79 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xfd:0x4f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x14c:0x49 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 83 // DW_AT_decl_line -; CHECK-NEXT:.b32 635 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x18e:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 682 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x195:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1b5:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 692 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1bc:0x2c DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1dc:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 692 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 697 // DW_AT_type +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4282 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2b0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 97 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4308 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2b8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 116 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4327 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 135 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4348 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2c8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 147 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4378 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2d0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 184 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4402 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2d8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 203 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4421 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2e0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 222 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4441 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2e8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 241 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4461 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2f0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 4 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 4480 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2f8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 118 // DW_AT_decl_line +; CHECK-NEXT:.b32 4500 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2ff:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 119 // DW_AT_decl_line +; CHECK-NEXT:.b32 4515 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x306:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b32 4563 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x30d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 122 // DW_AT_decl_line +; CHECK-NEXT:.b32 4576 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x314:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 123 // DW_AT_decl_line +; CHECK-NEXT:.b32 4596 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x31b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 129 // DW_AT_decl_line +; CHECK-NEXT:.b32 4625 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x322:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 130 // DW_AT_decl_line +; CHECK-NEXT:.b32 4645 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x329:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 131 // DW_AT_decl_line +; CHECK-NEXT:.b32 4666 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x330:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 132 // DW_AT_decl_line +; CHECK-NEXT:.b32 4687 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x337:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 133 // DW_AT_decl_line +; CHECK-NEXT:.b32 4815 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x33e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 134 // DW_AT_decl_line +; CHECK-NEXT:.b32 4843 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x345:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 135 // DW_AT_decl_line +; CHECK-NEXT:.b32 4868 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x34c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 136 // DW_AT_decl_line +; CHECK-NEXT:.b32 4886 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x353:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 137 // DW_AT_decl_line +; CHECK-NEXT:.b32 4903 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x35a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 138 // DW_AT_decl_line +; CHECK-NEXT:.b32 4931 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x361:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 139 // DW_AT_decl_line +; CHECK-NEXT:.b32 4952 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x368:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 140 // DW_AT_decl_line +; CHECK-NEXT:.b32 4978 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x36f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 142 // DW_AT_decl_line +; CHECK-NEXT:.b32 5001 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x376:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 143 // DW_AT_decl_line +; CHECK-NEXT:.b32 5028 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x37d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 144 // DW_AT_decl_line +; CHECK-NEXT:.b32 5079 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x384:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 146 // DW_AT_decl_line +; CHECK-NEXT:.b32 5112 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x38b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 152 // DW_AT_decl_line +; CHECK-NEXT:.b32 5145 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x392:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 153 // DW_AT_decl_line +; CHECK-NEXT:.b32 5160 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x399:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 154 // DW_AT_decl_line +; CHECK-NEXT:.b32 5189 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3a0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b32 5223 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3a7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 156 // DW_AT_decl_line +; CHECK-NEXT:.b32 5255 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3ae:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 157 // DW_AT_decl_line +; CHECK-NEXT:.b32 5287 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3b5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 158 // DW_AT_decl_line +; CHECK-NEXT:.b32 5320 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3bc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 160 // DW_AT_decl_line +; CHECK-NEXT:.b32 5343 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3c3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b32 5388 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3ca:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 241 // DW_AT_decl_line +; CHECK-NEXT:.b32 5536 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3d1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 243 // DW_AT_decl_line +; CHECK-NEXT:.b32 5585 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3d8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 245 // DW_AT_decl_line +; CHECK-NEXT:.b32 5604 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3df:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 246 // DW_AT_decl_line +; CHECK-NEXT:.b32 5490 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3e6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 247 // DW_AT_decl_line +; CHECK-NEXT:.b32 5626 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3ed:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 249 // DW_AT_decl_line +; CHECK-NEXT:.b32 5653 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3f4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 250 // DW_AT_decl_line +; CHECK-NEXT:.b32 5768 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3fb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 251 // DW_AT_decl_line +; CHECK-NEXT:.b32 5675 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x402:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 252 // DW_AT_decl_line +; CHECK-NEXT:.b32 5708 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x409:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 253 // DW_AT_decl_line +; CHECK-NEXT:.b32 5795 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x410:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 149 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5838 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x418:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 150 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5870 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x420:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 151 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5904 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x428:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 152 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5936 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x430:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 153 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5970 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x438:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 154 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6010 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x440:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6042 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x448:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 156 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6076 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x450:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 157 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6108 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x458:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 158 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6140 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x460:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 159 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6186 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x468:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 160 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6216 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x470:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6248 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x478:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 162 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6280 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x480:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6310 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x488:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 164 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6342 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x490:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6372 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x498:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 166 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6406 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4a0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 167 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6438 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4a8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 168 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6476 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4b0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 169 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6510 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4b8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 170 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6552 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 171 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6590 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4c8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 172 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6628 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4d0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 173 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6666 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4d8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 174 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6707 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4e0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 175 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6747 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4e8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 176 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6781 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4f0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 177 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6821 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4f8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 178 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6857 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x500:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 179 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6893 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x508:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 180 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6931 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x510:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 181 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6965 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x518:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 182 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6999 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x520:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 183 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7031 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x528:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 184 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7063 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x530:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 185 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7093 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x538:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 186 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7127 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x540:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 187 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7163 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x548:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 188 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7202 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x550:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 189 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7245 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x558:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 190 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7294 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x560:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 191 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7330 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x568:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7379 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x570:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 193 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7428 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x578:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 194 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7460 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x580:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 195 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7494 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x588:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 196 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7538 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x590:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 197 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7580 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x598:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 198 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7610 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5a0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 199 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7642 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5a8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 200 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7674 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5b0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 201 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7704 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5b8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 202 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7736 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 203 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7772 // DW_AT_import ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1e8:0x43 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x5c9:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 61 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 44 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x21f:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 682 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x225:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 697 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x5de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x22b:0x3f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x5e4:0x11 DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 38 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b32 702 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x263:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 682 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x26b:0x10 DW_TAG_base_type -; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x27b:0x2f DW_TAG_structure_type -; CHECK-NEXT:.b8 117 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_byte_size -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 190 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x285:0xc DW_TAG_member -; CHECK-NEXT:.b8 120 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x291:0xc DW_TAG_member -; CHECK-NEXT:.b8 121 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x29d:0xc DW_TAG_member -; CHECK-NEXT:.b8 122 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2aa:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 687 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x2af:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 65 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2b4:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 65 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x2b9:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 687 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2be:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 65 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x2c3:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 95 // DW_AT_specification -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x2c9:0x228 DW_TAG_structure_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 88 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2e7:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x5f5:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 46 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x60c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x612:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 89 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x336:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x61b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 48 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x634:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x63a:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 50 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x385:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x651:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x657:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 52 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x670:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x676:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 91 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 56 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x3d4:0x47 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x68d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x693:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 54 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6ad:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6b2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x6b8:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 1265 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 58 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x414:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1441 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6d1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x41b:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x6d7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 60 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x43b:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1451 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6ee:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x442:0x2c DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x6f4:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x462:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1451 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x468:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1456 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x70b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x46e:0x43 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x711:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 64 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x731:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x736:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x73c:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 66 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x751:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x757:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 68 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x76e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x774:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 61 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 72 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x789:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x78f:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 70 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7ac:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 76 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7c1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7c7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7e4:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 78 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x4a5:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1441 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x4ab:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1456 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7fd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x4b1:0x3f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x803:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 38 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line -; CHECK-NEXT:.b32 1461 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 80 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x4e9:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1441 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x81a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 16 // Abbrev [16] 0x4f1:0x9d DW_TAG_structure_type -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_byte_size -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x4fb:0xd DW_TAG_member -; CHECK-NEXT:.b8 120 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x508:0xd DW_TAG_member -; CHECK-NEXT:.b8 121 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x515:0xd DW_TAG_member -; CHECK-NEXT:.b8 122 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x522:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x820:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x52d:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1422 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x533:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x538:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x53d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x543:0x17 DW_TAG_subprogram -; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 166 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 82 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x54e:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1422 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x554:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1427 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x838:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x83d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 19 // Abbrev [19] 0x55a:0x33 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x843:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 1427 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 84 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x586:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1422 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x85c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x58e:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1265 // DW_AT_type -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x593:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 635 // DW_AT_type -; CHECK-NEXT:.b8 117 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x862:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 127 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5a1:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1446 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x5a6:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 713 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5ab:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 713 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x5b0:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 1446 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5b5:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 713 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x5ba:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 743 // DW_AT_specification -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x5c0:0x233 DW_TAG_structure_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5df:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 86 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x879:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x87e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x883:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x889:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 88 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8a1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x8ac:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x62f:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x8cf:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 92 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8ec:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x8f2:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 48 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 68 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 94 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x67f:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x916:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x91c:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x923:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x93e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x943:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x949:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x94e:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 104 // DW_AT_name +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 98 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x968:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x96d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x973:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 100 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x98c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x992:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 69 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 102 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x6cf:0x4a DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x9b1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x9b7:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 98 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_encoding +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x9bf:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 57 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 635 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 106 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x712:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2035 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x9e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x9e6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x719:0x28 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x9ec:0x38 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x73a:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2045 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa19:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa1e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x741:0x2d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa24:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 108 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x762:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2045 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x768:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2050 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa3d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x76e:0x44 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa43:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 112 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa5f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa64:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa6a:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 49 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 61 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 111 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7a6:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2035 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x7ac:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2050 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa96:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x7b2:0x40 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa9c:0x36 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 38 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 2055 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 114 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7eb:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2035 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xac7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xacc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7f3:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 2040 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x7f8:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 1472 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7fd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1472 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x802:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 2040 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x807:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1472 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x80c:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 1503 // DW_AT_specification -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 21 // Abbrev [21] 0x812:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xad2:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x826:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 120 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x82f:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 121 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x838:0xb DW_TAG_formal_parameter -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2125 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xaeb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x844:0x9 DW_TAG_base_type -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x84d:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 23 // Abbrev [23] 0x852:0xbf DW_TAG_subprogram -; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc -; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_frame_base -; CHECK-NEXT:.b8 156 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xaf1:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x87d:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x886:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 118 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb10:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb16:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x88f:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 120 // DW_AT_name +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2125 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x898:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 121 // DW_AT_name +; CHECK-NEXT:.b8 120 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb3d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb42:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb48:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2125 // DW_AT_type -; CHECK-NEXT:.b8 24 // Abbrev [24] 0x8a1:0x9 DW_TAG_variable -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp0 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp1 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 6 // DW_AT_call_line -; CHECK-NEXT:.b8 11 // DW_AT_call_column -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c2:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 1466 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp1 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp2 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 6 // DW_AT_call_line -; CHECK-NEXT:.b8 24 // DW_AT_call_column -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8da:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 2060 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp2 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp3 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 6 // DW_AT_call_line -; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8f2:0x1e DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 2066 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp9 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp10 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 8 // DW_AT_call_line -; CHECK-NEXT:.b8 5 // DW_AT_call_column -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x90a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2095 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb5f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x911:0x588 DW_TAG_namespace -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xb65:0xc DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb71:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x916:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b32 3737 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x91d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b32 3781 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x924:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 204 // DW_AT_decl_line -; CHECK-NEXT:.b32 3810 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x92b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b32 3841 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x932:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 206 // DW_AT_decl_line -; CHECK-NEXT:.b32 3870 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x939:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 3901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x940:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 208 // DW_AT_decl_line -; CHECK-NEXT:.b32 3930 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x947:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3967 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x94e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b32 3998 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x955:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 4027 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x95c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b32 4056 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x963:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 213 // DW_AT_decl_line -; CHECK-NEXT:.b32 4099 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x96a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 4126 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x971:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 215 // DW_AT_decl_line -; CHECK-NEXT:.b32 4155 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x978:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 216 // DW_AT_decl_line -; CHECK-NEXT:.b32 4182 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x97f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 4211 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x986:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 4238 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x98d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 219 // DW_AT_decl_line -; CHECK-NEXT:.b32 4267 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x994:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 220 // DW_AT_decl_line -; CHECK-NEXT:.b32 4298 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x99b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 221 // DW_AT_decl_line -; CHECK-NEXT:.b32 4327 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a2:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b32 4362 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a9:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 223 // DW_AT_decl_line -; CHECK-NEXT:.b32 4393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b0:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b32 4432 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b7:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 225 // DW_AT_decl_line -; CHECK-NEXT:.b32 4467 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9be:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 226 // DW_AT_decl_line -; CHECK-NEXT:.b32 4502 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9c5:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b32 4537 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9cc:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 4586 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d3:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 4629 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9da:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 4666 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e1:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 231 // DW_AT_decl_line -; CHECK-NEXT:.b32 4697 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e8:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 4742 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9ef:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 4787 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9f6:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 234 // DW_AT_decl_line -; CHECK-NEXT:.b32 4843 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9fd:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b32 4874 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa04:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 236 // DW_AT_decl_line -; CHECK-NEXT:.b32 4913 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa0b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 4963 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa12:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 238 // DW_AT_decl_line -; CHECK-NEXT:.b32 5017 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa19:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 239 // DW_AT_decl_line -; CHECK-NEXT:.b32 5048 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa20:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b32 5085 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa27:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 5135 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa2e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 242 // DW_AT_decl_line -; CHECK-NEXT:.b32 5176 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa35:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 5213 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa3c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 244 // DW_AT_decl_line -; CHECK-NEXT:.b32 5246 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa43:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 5277 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa4a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 5310 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa51:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 5337 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa58:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 248 // DW_AT_decl_line -; CHECK-NEXT:.b32 5368 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa5f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 5399 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa66:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 5428 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa6d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 5457 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa74:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 5488 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa7b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 5521 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa82:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 254 // DW_AT_decl_line -; CHECK-NEXT:.b32 5556 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa89:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 255 // DW_AT_decl_line -; CHECK-NEXT:.b32 5592 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 0 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5649 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa98:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 1 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5680 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 2 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5719 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5764 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 4 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5797 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5842 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5888 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5917 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 8 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5948 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 9 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5989 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 10 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6028 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6063 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 12 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6090 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 13 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6119 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb00:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 14 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6148 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb08:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 15 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6175 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb10:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 16 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6204 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb18:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 17 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6237 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb20:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 6268 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb27:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6288 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb2e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 6308 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb35:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 6328 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb3c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b32 6354 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb43:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b32 6374 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb4a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 6393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb51:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 6413 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb58:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 0 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6432 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb60:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 19 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6452 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb68:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 38 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6473 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb70:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6498 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb78:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6524 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb80:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 97 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6550 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb88:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6569 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6590 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb98:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 147 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6620 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6644 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6663 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6683 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6703 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbc0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 4 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 6722 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbc8:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 6742 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbcf:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b32 6757 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbd6:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6805 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbdd:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 122 // DW_AT_decl_line -; CHECK-NEXT:.b32 6818 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbe4:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 6838 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbeb:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 129 // DW_AT_decl_line -; CHECK-NEXT:.b32 6867 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf2:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb8b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb90:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb96:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 125 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xbb1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xbb7:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 126 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xbd0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xbd6:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 128 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xbf1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xbf7:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 138 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc0c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc12:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 6887 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf9:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 131 // DW_AT_decl_line -; CHECK-NEXT:.b32 6908 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc00:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc2b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc31:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 6929 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc07:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 133 // DW_AT_decl_line -; CHECK-NEXT:.b32 7057 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc0e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc4a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc50:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 7085 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc15:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b32 7110 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc1c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc67:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc6d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 7128 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc23:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 137 // DW_AT_decl_line -; CHECK-NEXT:.b32 7145 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc2a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 7173 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc31:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b32 7194 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc38:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc84:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc8a:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 7220 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc3f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xca3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xca9:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 7243 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc46:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xcc4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xcca:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 7270 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc4d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 144 // DW_AT_decl_line -; CHECK-NEXT:.b32 7321 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc54:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xce7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xced:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 145 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd06:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd0b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3345 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0xd11:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd16:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 7354 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc5b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b32 7387 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc62:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 7402 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc69:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b32 7431 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc70:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 7449 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc77:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 7481 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc7e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 7513 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc85:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b32 7546 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc8c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b32 7569 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc93:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 7614 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc9a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca1:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 7811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca8:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 7830 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcaf:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 7716 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcb6:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 7852 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcbd:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 7879 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcc4:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 7994 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xccb:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 7901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd2:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 7934 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd9:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 8021 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 149 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8064 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 150 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8096 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 151 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8130 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8162 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd00:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8196 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd08:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8236 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd10:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8268 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd18:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8302 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd20:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8334 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd28:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8366 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd30:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8412 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd38:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8442 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd40:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8474 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd48:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 162 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8506 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd50:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8536 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd58:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8568 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd60:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8598 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd68:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 166 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8632 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd70:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8664 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd78:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 168 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8702 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd80:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8736 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd88:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 170 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8778 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 171 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8816 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd98:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 172 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8854 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 173 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8892 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 174 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8933 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8973 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 176 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9007 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 177 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9047 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9083 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 179 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9119 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9157 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9191 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 182 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9225 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9257 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9289 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe00:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 185 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9319 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe08:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 186 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9353 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe10:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9389 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe18:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 188 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9428 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe20:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 189 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9471 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe28:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 190 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9520 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe30:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 191 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9556 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe38:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9605 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe40:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 193 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9654 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe48:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 194 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9686 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe50:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 195 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9720 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe58:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 196 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9764 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe60:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 197 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9806 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe68:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 198 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9836 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe70:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9868 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe78:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 200 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9900 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe80:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9930 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe88:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9962 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9998 // DW_AT_import +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd2d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xd33:0xa DW_TAG_base_type +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 8 // Abbrev [8] 0xd3d:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 3394 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0xd42:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 3399 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xd47:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 8 // DW_AT_encoding +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd4f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 147 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd68:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xe99:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd6e:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 44 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 149 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd8f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0xeb4:0x11 DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd95:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 151 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xdb7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xdbc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xdc2:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xdd8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xddd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xde3:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 157 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe05:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe0a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe10:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 159 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe2e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe33:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe38:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe3e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xec5:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe55:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe5b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 46 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xedc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe74:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xee2:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe7a:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe98:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe9d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xea3:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 167 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xebf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xec4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xeca:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 48 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 169 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xefb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xee7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf01:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xeed:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 50 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 171 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf18:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf02:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf1e:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf08:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 173 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf37:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf1f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf3d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf25:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 175 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf54:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf3c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf5a:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf42:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 177 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf74:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf79:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf57:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf7f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf5d:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 179 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf98:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf74:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf9e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf7a:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 181 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfb5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf95:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfbb:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf9b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 183 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xfb4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfd8:0x2b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xfba:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 54 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xfc8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xfce:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 64 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 56 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xff8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xffd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xfdc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1003:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xfe2:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 58 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xff0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xff6:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 60 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1005:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x100a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1010:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 178 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1018:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x101e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x101e:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1024:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 63 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1031:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1037:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 68 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 72 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1035:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1045:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x103b:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x104b:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 100 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1050:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1058:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1056:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x105e:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 181 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x106c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1072:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 70 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 184 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x106d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1081:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1073:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1087:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 187 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1088:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1095:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x109a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x108e:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10a0:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 103 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10a5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10af:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ab:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10ba:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 106 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10ce:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10d4:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 109 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10e7:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 112 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10c4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ca:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10fc:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 115 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x110a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x110f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4373 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1115:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x111a:0x18 DW_TAG_subprogram +; CHECK-NEXT:.b8 112 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 153 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1127:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x112c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10e7:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1132:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 65 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x113f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1145:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 82 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ff:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1104:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1153:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x110a:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1159:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 84 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 156 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1123:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1167:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1129:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x116d:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 86 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 67 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1140:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1145:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x117a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1150:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1180:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 88 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 76 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1168:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x118e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1173:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1194:0xd DW_TAG_typedef +; CHECK-NEXT:.b32 4513 // DW_AT_type +; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 101 // DW_AT_decl_line +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x11a1:0x2 DW_TAG_structure_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x11a3:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 4529 // DW_AT_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x118b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1190:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1196:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 // DW_AT_decl_line +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x11b1:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 16 // DW_AT_byte_size +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x11b5:0xf DW_TAG_member +; CHECK-NEXT:.b8 113 // DW_AT_name +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 107 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x11c4:0xe DW_TAG_member +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 92 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11ae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11b9:0x2a DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 15 // Abbrev [15] 0x11d3:0xd DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_noreturn +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x11e0:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11dd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x11ee:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11e3:0x7 DW_TAG_base_type -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x11f4:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11ea:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1205:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1205:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4619 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1210:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1215:0x25 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x120b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 17 // Abbrev [17] 0x1210:0x1 DW_TAG_subroutine_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1211:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 104 // DW_AT_name -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 6 // DW_AT_decl_file +; CHECK-NEXT:.b8 26 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x121f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1225:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 98 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 22 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x122f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1234:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1234:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x123a:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x123a:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 27 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1253:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1249:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1259:0x25 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x124f:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 7 // DW_AT_decl_file +; CHECK-NEXT:.b8 20 // DW_AT_decl_line +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1260:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1265:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x126a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x126f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1274:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4772 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x127a:0x1 DW_TAG_pointer_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x127b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4736 // DW_AT_type +; CHECK-NEXT:.b8 19 // Abbrev [19] 0x1280:0x1 DW_TAG_const_type +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1281:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 4751 // DW_AT_type +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_decl_line +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x128f:0x15 DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1278:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x127e:0x8 DW_TAG_base_type -; CHECK-NEXT:.b8 98 // DW_AT_name -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 7 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x12a4:0x16 DW_TAG_typedef +; CHECK-NEXT:.b32 4794 // DW_AT_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_encoding -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1286:0x2d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 230 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x12ba:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4799 // DW_AT_type +; CHECK-NEXT:.b8 21 // Abbrev [21] 0x12bf:0x10 DW_TAG_subroutine_type +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x12cf:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 212 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4730 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12a8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ad:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12e0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12e5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12b3:0x38 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x12eb:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 21 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4500 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12f9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12fe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x1304:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 31 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_noreturn +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1310:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x1316:0x11 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 227 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1321:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1327:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 103 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 52 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 4926 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1338:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12eb:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x133e:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 3399 // DW_AT_type +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1343:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 8 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1304:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1352:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x130a:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1358:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 23 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4515 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1326:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1367:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x136c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1331:0x32 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1372:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4730 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1358:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1383:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1363:0x36 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1389:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 95 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1399:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x139e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x13a4:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 106 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13b7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5063 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13bc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13c1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x13c7:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5068 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13cc:0xb DW_TAG_base_type +; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x13d7:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 98 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13e8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5063 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13ed:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13f2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x13f8:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 114 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 253 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x138e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1393:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1399:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1404:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1409:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x140e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1413:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4772 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 24 // Abbrev [24] 0x1419:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 118 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1428:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 224 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4730 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x143a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x143f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13b8:0x25 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x1445:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 120 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13d7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1451:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13dd:0x32 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1457:0x10 DW_TAG_base_type +; CHECK-NEXT:.b8 117 // DW_AT_name +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 7 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1467:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 164 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1477:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x147c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1482:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4926 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1487:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 183 // DW_AT_decl_line +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1497:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x149c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14a1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x14a7:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 187 // DW_AT_decl_line +; CHECK-NEXT:.b32 4751 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14bd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14c2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x14c8:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 205 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1404:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1409:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x140f:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x14df:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 109 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14f2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4926 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14f7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5378 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14fc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1502:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5383 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x1507:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 5068 // DW_AT_type +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x150c:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 102 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1426:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x151d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4926 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1522:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5068 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x142c:0xc DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x1528:0x78 DW_TAG_namespace +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1438:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1533:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 201 // DW_AT_decl_line +; CHECK-NEXT:.b32 5536 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x153a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 207 // DW_AT_decl_line +; CHECK-NEXT:.b32 5585 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1541:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 211 // DW_AT_decl_line +; CHECK-NEXT:.b32 5604 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1548:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 217 // DW_AT_decl_line +; CHECK-NEXT:.b32 5626 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x154f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 228 // DW_AT_decl_line +; CHECK-NEXT:.b32 5653 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1556:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 229 // DW_AT_decl_line +; CHECK-NEXT:.b32 5675 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x155d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 230 // DW_AT_decl_line +; CHECK-NEXT:.b32 5708 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1564:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 232 // DW_AT_decl_line +; CHECK-NEXT:.b32 5768 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x156b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 233 // DW_AT_decl_line +; CHECK-NEXT:.b32 5795 // DW_AT_import +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x1572:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 214 // DW_AT_decl_line +; CHECK-NEXT:.b32 5536 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1594:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1599:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x15a0:0xf DW_TAG_typedef +; CHECK-NEXT:.b32 5551 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x15af:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 16 // DW_AT_byte_size +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 117 // DW_AT_decl_line +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x15b3:0xf DW_TAG_member +; CHECK-NEXT:.b8 113 // DW_AT_name +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 119 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x15c2:0xe DW_TAG_member +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 120 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x15d1:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 45 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1452:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1457:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_noreturn +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x15de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x145d:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x15e4:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x15f4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x15fa:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 125 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 29 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 5536 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1478:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x160a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x160f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x147e:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1615:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 126 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 36 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1497:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1625:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x149d:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x162b:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 209 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x163c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1641:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1646:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x164c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 214 // DW_AT_decl_line +; CHECK-NEXT:.b32 5742 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x165e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1663:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1668:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x166e:0x1a DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 7 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1688:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 128 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14be:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 172 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14d3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1698:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x169d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14d9:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x16a3:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 175 // DW_AT_decl_line +; CHECK-NEXT:.b32 5823 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x16b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x16b9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14f8:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16bf:0xf DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1511:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1517:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x16ce:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x152e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x16e8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1534:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x16ee:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x154b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x170a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1551:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1710:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 57 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x156a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x172a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1570:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1730:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x158b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1591:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 95 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15ae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x174c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15b4:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1752:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 145 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 47 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15cd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2125 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x176f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1774:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15d8:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x177a:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 52 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15ef:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1794:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15f5:0xa DW_TAG_base_type -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x15ff:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5636 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1604:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 5641 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1609:0x8 DW_TAG_base_type -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 8 // DW_AT_encoding -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1611:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x179a:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 147 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 100 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x162a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x17b6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1630:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x17bc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 149 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 150 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1651:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x17d6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1657:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x17dc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 151 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1679:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x167e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x17f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1684:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x17fc:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 112 // DW_AT_name +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x181f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1824:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16a5:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x182a:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 219 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16c7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1842:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16d2:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1848:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 32 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1862:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1700:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1868:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1717:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1882:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x171d:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1888:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 200 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1736:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x18a0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x173c:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x18a6:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 145 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x175a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x175f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x18c0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1765:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x18c6:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 14 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1781:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1786:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x18de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x178c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x18e4:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17a9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1900:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17af:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1906:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 171 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 95 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17c4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1920:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17ca:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1926:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 173 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 80 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1941:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1946:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17e7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x194c:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17fe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1804:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 177 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1819:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1968:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x181f:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x196e:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 179 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 32 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1836:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1988:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x198d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1992:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x183c:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1998:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 110 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1857:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19b3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x185d:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x19be:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1876:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x187c:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x188a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1890:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x189e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18a4:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18b8:0x1a DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18c7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18d2:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18e6:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 63 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18f3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18f9:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1907:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x190d:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x191a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1920:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x192e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1934:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x19e4:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1943:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1949:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 17 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1957:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19ff:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a04:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1962:0x1a DW_TAG_subprogram -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a0a:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 103 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1971:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1976:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x197c:0x1a DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x198b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1990:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a28:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a2d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1996:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a33:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19a9:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 104 // DW_AT_name +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 110 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a50:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a55:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19be:0x19 DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a5b:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 115 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6615 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19d7:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19dc:0x18 DW_TAG_subprogram -; CHECK-NEXT:.b8 112 // DW_AT_name +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19e9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19ee:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19f4:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 65 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a77:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a07:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a7d:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a15:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a1b:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 240 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a9a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a9f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a2f:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1aa5:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a3c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a42:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a50:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a56:0xd DW_TAG_typedef -; CHECK-NEXT:.b32 6755 // DW_AT_type -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 101 // DW_AT_decl_line -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a63:0x2 DW_TAG_structure_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a65:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6771 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a73:0x22 DW_TAG_structure_type -; CHECK-NEXT:.b8 16 // DW_AT_byte_size -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a77:0xf DW_TAG_member -; CHECK-NEXT:.b8 113 // DW_AT_name -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 107 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a86:0xe DW_TAG_member -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a95:0xd DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aa2:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 235 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ac3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ab6:0x17 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1ac9:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ac7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6861 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1acd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6866 // DW_AT_type -; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ad2:0x1 DW_TAG_subroutine_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ad3:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 9 // DW_AT_decl_file -; CHECK-NEXT:.b8 26 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 125 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ae7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ae7:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1aed:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 22 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1af6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1afc:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 27 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b0b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1b11:0x2b DW_TAG_subprogram -; CHECK-NEXT:.b8 98 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 10 // DW_AT_decl_file -; CHECK-NEXT:.b8 20 // DW_AT_decl_line -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 66 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b22:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b27:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b31:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b36:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7014 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b3c:0x1 DW_TAG_pointer_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b3d:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b42:0x1 DW_TAG_const_type -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1b43:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6993 // DW_AT_type -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b0d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b13:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // DW_AT_decl_file -; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b51:0x15 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b66:0x16 DW_TAG_typedef -; CHECK-NEXT:.b32 7036 // DW_AT_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 76 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b2f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b35:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b7c:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7041 // DW_AT_type -; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b81:0x10 DW_TAG_subroutine_type -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b86:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b91:0x1c DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b51:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bad:0x19 DW_TAG_subprogram -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b57:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 21 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6742 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bbb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1bc6:0x12 DW_TAG_subprogram -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 31 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b71:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bd8:0x11 DW_TAG_subprogram -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b77:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1be3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1be9:0x17 DW_TAG_subprogram -; CHECK-NEXT:.b8 103 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b97:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 7168 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 67 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bfa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1baf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c00:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5641 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c05:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1bb5:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 8 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 116 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c14:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1bd1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c1a:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1bd7:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 23 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6757 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 71 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c2e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1bf5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c34:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1bfb:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c45:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c17:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c1c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3345 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c4b:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1c22:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 95 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c5b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c60:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c66:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 130 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c79:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7305 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c83:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c89:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7310 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c8e:0xb DW_TAG_base_type -; CHECK-NEXT:.b8 119 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c47:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1c4d:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c99:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 194 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c73:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c78:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1c7e:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 98 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1caa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7305 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1caf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cba:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 113 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 47 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cc6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7014 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1cdb:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c97:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c9c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1ca2:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 22 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1cea:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cc8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ccd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1cd3:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 27 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cfc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cf4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cf9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cfe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d07:0x12 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d04:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 111 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d13:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d1e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d19:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d24:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d34:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d39:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 61 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d49:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d53:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d59:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d40:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d46:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 6993 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d6a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d6f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d74:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d7a:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 250 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d8b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d67:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d6c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d91:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 119 // DW_AT_name -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d72:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7604 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1db4:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7609 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1db9:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 7310 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1dbe:0x1c DW_TAG_subprogram -; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 245 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dcf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7310 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d96:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1dda:0x78 DW_TAG_namespace -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1de5:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dec:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 7811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df3:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 7830 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dfa:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 7852 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e01:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 7879 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e08:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 7901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e0f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 7934 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e16:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 7994 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e1d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 8021 // DW_AT_import -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e24:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d9c:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e46:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1db4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1e52:0xf DW_TAG_typedef -; CHECK-NEXT:.b32 7777 // DW_AT_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1dba:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e61:0x22 DW_TAG_structure_type -; CHECK-NEXT:.b8 16 // DW_AT_byte_size -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 117 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e65:0xf DW_TAG_member -; CHECK-NEXT:.b8 113 // DW_AT_name -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e74:0xe DW_TAG_member -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1e83:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 45 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 37 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e90:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1dd4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e96:0x16 DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1dda:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 139 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ea6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1df4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eac:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1dfa:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 29 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 7762 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 252 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ebc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e12:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ec7:0x16 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1e18:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 36 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 42 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ed7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e32:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1edd:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1e38:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 56 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eee:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1efe:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e56:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1e5c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7968 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f10:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f15:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f20:0x1a DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f3a:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 172 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 150 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f4a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f4f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e78:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f55:0x1c DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x1e7e:0x22a DW_TAG_structure_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 8049 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f66:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f71:0xf DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 77 // DW_AT_decl_line +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1e9c:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1f80:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 78 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1eeb:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f9a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fa0:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 79 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fbc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fc2:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1f3a:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 57 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fdc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fe2:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 95 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 80 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ffe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2004:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x1f89:0x49 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 47 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 83 // DW_AT_decl_line +; CHECK-NEXT:.b32 8360 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2021:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2026:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1fcb:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8407 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x202c:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x1fd2:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2046:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1ff2:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8417 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x204c:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x1ff9:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2068:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2019:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8417 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x201f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8422 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x206e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x2025:0x43 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 61 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 150 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2088:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x205c:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8407 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2062:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8422 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x208e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x2068:0x3f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 38 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b32 8427 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20a8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x20a0:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8407 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20ae:0x2e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x20a8:0x2f DW_TAG_structure_type +; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_byte_size +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 190 // DW_AT_decl_line +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x20b2:0xc DW_TAG_member +; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20dc:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x20be:0xc DW_TAG_member +; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 219 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20f4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x20ca:0xc DW_TAG_member +; CHECK-NEXT:.b8 122 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20fa:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x20d7:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8412 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x20dc:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 7806 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x20e1:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7806 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x20e6:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 8412 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x20eb:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7806 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x20f0:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 7836 // DW_AT_specification +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x20f6:0x228 DW_TAG_structure_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 32 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2114:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x211a:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 88 // DW_AT_decl_line +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x2114:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2134:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x213a:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 200 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2152:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2158:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 145 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2172:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2178:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 14 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 89 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2190:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2196:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x2163:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21b8:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x21b2:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 95 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 91 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21d8:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x2201:0x47 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21fe:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x221a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2220:0x2a DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 32 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 94 // DW_AT_decl_line +; CHECK-NEXT:.b32 8990 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x223a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x223f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2244:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2241:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9166 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x224a:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x2248:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 110 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2265:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2268:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9176 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2270:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x226f:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x228b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2290:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2296:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 17 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x228f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9176 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2295:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22bc:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x229b:0x43 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22da:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22df:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22e5:0x28 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 104 // DW_AT_name -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 111 // DW_AT_name ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 61 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 110 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2302:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2307:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x22d2:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9166 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x22d8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x230d:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x22de:0x3f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2329:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x232f:0x28 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 111 // DW_AT_name ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 38 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b32 9186 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x234c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2351:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2316:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9166 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2357:0x24 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x231e:0x9d DW_TAG_structure_type +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 12 // DW_AT_byte_size +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x2328:0xd DW_TAG_member +; CHECK-NEXT:.b8 120 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x2335:0xd DW_TAG_member +; CHECK-NEXT:.b8 121 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x2342:0xd DW_TAG_member +; CHECK-NEXT:.b8 122 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x234f:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x235a:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9147 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2360:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2365:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x236a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x2370:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 166 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2375:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x237b:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9147 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2381:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9152 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x237b:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x2387:0x33 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 125 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 167 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 9152 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2399:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x23b3:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9147 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x239f:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23bb:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8990 // DW_AT_type +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x23c0:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 8360 // DW_AT_type +; CHECK-NEXT:.b8 117 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 127 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23ce:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9171 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x23d3:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 8438 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23d8:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8438 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x23dd:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 9171 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23e2:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8438 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x23e7:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 8468 // DW_AT_specification +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x23ed:0x233 DW_TAG_structure_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23bf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23c5:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23e7:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2403:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2409:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x240c:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 78 ; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2423:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2429:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2443:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2449:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2461:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2467:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 67 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2483:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2489:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x245c:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 71 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24a7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24ad:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 73 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 12 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24c9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ce:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2125 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24d4:0x2b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 68 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24f9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24ff:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x24ac:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 194 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2525:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2530:0x24 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 112 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 47 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2549:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x254e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2554:0x31 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 22 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 69 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x257a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x257f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2585:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x24fc:0x4a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 27 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25a6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ab:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25b6:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 72 // DW_AT_decl_line +; CHECK-NEXT:.b32 8360 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x253f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9760 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x2546:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2567:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9770 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25d6:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x256e:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 61 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x258f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9770 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2595:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9775 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25f8:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x259b:0x44 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 61 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2619:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x25d3:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9760 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x25d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9775 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2624:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x25df:0x40 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2643:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2648:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x264e:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 38 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b32 9780 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2666:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2618:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9760 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x266c:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x2620:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9765 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x2625:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 9197 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x262a:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9197 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x262f:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 9765 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x2634:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9197 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x2639:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 9228 // DW_AT_specification +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 38 // Abbrev [38] 0x263f:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 37 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2686:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x268c:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x2653:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26a6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ac:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x265c:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x2665:0xb DW_TAG_formal_parameter +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26c4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b32 3345 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ca:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x2671:0xbf DW_TAG_subprogram +; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_AT_frame_base +; CHECK-NEXT:.b8 156 ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 42 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26e4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ea:0x24 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x269c:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 110 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x26a5:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2708:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x270e:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x26ae:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 3345 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x26b7:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 150 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x272a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 3345 // DW_AT_type +; CHECK-NEXT:.b8 41 // Abbrev [41] 0x26c0:0x9 DW_TAG_variable +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 6 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x26c9:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 8432 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp0 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp1 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 6 // DW_AT_call_line +; CHECK-NEXT:.b8 11 // DW_AT_call_column +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x26e1:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 9191 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp1 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp2 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 6 // DW_AT_call_line +; CHECK-NEXT:.b8 24 // DW_AT_call_column +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x26f9:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 9785 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp2 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp3 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 6 // DW_AT_call_line +; CHECK-NEXT:.b8 37 // DW_AT_call_column +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x2711:0x1e DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 9791 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp9 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp10 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 8 // DW_AT_call_line +; CHECK-NEXT:.b8 5 // DW_AT_call_column +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2729:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9820 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT: } diff --git a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll index a6bfd8533a27..43cc586fff19 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll @@ -153,6 +153,15 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) ; CHECK-NEXT: .b8 2 // Abbreviation Code +; CHECK-NEXT: .b8 19 // DW_TAG_structure_type +; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 60 // DW_AT_declaration +; CHECK-NEXT: .b8 12 // DW_FORM_flag +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 3 // Abbreviation Code ; CHECK-NEXT: .b8 46 // DW_TAG_subprogram ; CHECK-NEXT: .b8 1 // DW_CHILDREN_yes ; CHECK-NEXT: .b8 17 // DW_AT_low_pc @@ -170,13 +179,11 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 59 // DW_AT_decl_line ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 73 // DW_AT_type -; CHECK-NEXT: .b8 19 // DW_FORM_ref4 ; CHECK-NEXT: .b8 63 // DW_AT_external ; CHECK-NEXT: .b8 12 // DW_FORM_flag ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 3 // Abbreviation Code +; CHECK-NEXT: .b8 4 // Abbreviation Code ; CHECK-NEXT: .b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no ; CHECK-NEXT: .b8 3 // DW_AT_name @@ -189,18 +196,20 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 19 // DW_FORM_ref4 ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 4 // Abbreviation Code -; CHECK-NEXT: .b8 36 // DW_TAG_base_type +; CHECK-NEXT: .b8 5 // Abbreviation Code +; CHECK-NEXT: .b8 52 // DW_TAG_variable ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no ; CHECK-NEXT: .b8 3 // DW_AT_name ; CHECK-NEXT: .b8 8 // DW_FORM_string -; CHECK-NEXT: .b8 62 // DW_AT_encoding +; CHECK-NEXT: .b8 58 // DW_AT_decl_file ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 11 // DW_AT_byte_size +; CHECK-NEXT: .b8 59 // DW_AT_decl_line ; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 73 // DW_AT_type +; CHECK-NEXT: .b8 16 // DW_FORM_ref_addr ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 5 // Abbreviation Code +; CHECK-NEXT: .b8 6 // Abbreviation Code ; CHECK-NEXT: .b8 46 // DW_TAG_subprogram ; CHECK-NEXT: .b8 1 // DW_CHILDREN_yes ; CHECK-NEXT: .b8 17 // DW_AT_low_pc @@ -218,42 +227,33 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 59 // DW_AT_decl_line ; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 73 // DW_AT_type +; CHECK-NEXT: .b8 19 // DW_FORM_ref4 ; CHECK-NEXT: .b8 63 // DW_AT_external ; CHECK-NEXT: .b8 12 // DW_FORM_flag ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 6 // Abbreviation Code -; CHECK-NEXT: .b8 52 // DW_TAG_variable +; CHECK-NEXT: .b8 7 // Abbreviation Code +; CHECK-NEXT: .b8 36 // DW_TAG_base_type ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no ; CHECK-NEXT: .b8 3 // DW_AT_name ; CHECK-NEXT: .b8 8 // DW_FORM_string -; CHECK-NEXT: .b8 58 // DW_AT_decl_file +; CHECK-NEXT: .b8 62 // DW_AT_encoding ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 59 // DW_AT_decl_line +; CHECK-NEXT: .b8 11 // DW_AT_byte_size ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 73 // DW_AT_type -; CHECK-NEXT: .b8 16 // DW_FORM_ref_addr -; CHECK-NEXT: .b8 0 // EOM(1) -; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 7 // Abbreviation Code -; CHECK-NEXT: .b8 19 // DW_TAG_structure_type -; CHECK-NEXT: .b8 0 // DW_CHILDREN_no -; CHECK-NEXT: .b8 3 // DW_AT_name -; CHECK-NEXT: .b8 8 // DW_FORM_string -; CHECK-NEXT: .b8 60 // DW_AT_declaration -; CHECK-NEXT: .b8 12 // DW_FORM_flag ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) ; CHECK-NEXT: .b8 0 // EOM(3) ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT: .b32 152 // Length of Unit +; CHECK-NEXT: .b32 159 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x91 DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x98 DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -301,7 +301,7 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 115 ; CHECK-NEXT: .b8 101 ; CHECK-NEXT: .b8 116 -; CHECK-NEXT: .b8 49 +; CHECK-NEXT: .b8 50 ; CHECK-NEXT: .b8 46 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 @@ -323,11 +323,15 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x64:0x30 DW_TAG_subprogram -; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x64:0x4 DW_TAG_structure_type +; CHECK-NEXT: .b8 65 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 1 // DW_AT_declaration +; CHECK-NEXT: .b8 3 // Abbrev [3] 0x68:0x3a DW_TAG_subprogram +; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base ; CHECK-NEXT: .b8 156 ; CHECK-NEXT: .b8 95 // DW_AT_MIPS_linkage_name @@ -335,38 +339,37 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 51 ; CHECK-NEXT: .b8 98 ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 114 -; CHECK-NEXT: .b8 105 +; CHECK-NEXT: .b8 122 +; CHECK-NEXT: .b8 49 +; CHECK-NEXT: .b8 65 ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 98 // DW_AT_name ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 114 +; CHECK-NEXT: .b8 122 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 1 // DW_AT_decl_file -; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b32 148 // DW_AT_type +; CHECK-NEXT: .b8 2 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x8a:0x9 DW_TAG_formal_parameter -; CHECK-NEXT: .b8 98 // DW_AT_name +; CHECK-NEXT: .b8 4 // Abbrev [4] 0x8b:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 97 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 1 // DW_AT_decl_file -; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b32 148 // DW_AT_type -; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 4 // Abbrev [4] 0x94:0x7 DW_TAG_base_type -; CHECK-NEXT: .b8 105 // DW_AT_name -; CHECK-NEXT: .b8 110 -; CHECK-NEXT: .b8 116 +; CHECK-NEXT: .b8 2 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b32 100 // DW_AT_type +; CHECK-NEXT: .b8 5 // Abbrev [5] 0x94:0xd DW_TAG_variable +; CHECK-NEXT: .b8 122 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 5 // DW_AT_encoding -; CHECK-NEXT: .b8 4 // DW_AT_byte_size +; CHECK-NEXT: .b8 2 // DW_AT_decl_file +; CHECK-NEXT: .b8 7 // DW_AT_decl_line +; CHECK-NEXT: .b64 .debug_info+311 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b32 159 // Length of Unit +; CHECK-NEXT: .b8 0 // End Of Children Mark +; CHECK-NEXT: .b32 152 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x98 DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x91 DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -414,7 +417,7 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 115 ; CHECK-NEXT: .b8 101 ; CHECK-NEXT: .b8 116 -; CHECK-NEXT: .b8 50 +; CHECK-NEXT: .b8 49 ; CHECK-NEXT: .b8 46 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 @@ -436,11 +439,11 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc -; CHECK-NEXT: .b8 5 // Abbrev [5] 0x64:0x3a DW_TAG_subprogram -; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc +; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT: .b8 6 // Abbrev [6] 0x64:0x30 DW_TAG_subprogram +; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base ; CHECK-NEXT: .b8 156 ; CHECK-NEXT: .b8 95 // DW_AT_MIPS_linkage_name @@ -448,34 +451,31 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 51 ; CHECK-NEXT: .b8 98 ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 122 -; CHECK-NEXT: .b8 49 -; CHECK-NEXT: .b8 65 +; CHECK-NEXT: .b8 114 +; CHECK-NEXT: .b8 105 ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 98 // DW_AT_name ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 122 +; CHECK-NEXT: .b8 114 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 2 // DW_AT_decl_file -; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 1 // DW_AT_decl_line +; CHECK-NEXT: .b32 148 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x87:0x9 DW_TAG_formal_parameter -; CHECK-NEXT: .b8 97 // DW_AT_name -; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 2 // DW_AT_decl_file -; CHECK-NEXT: .b8 6 // DW_AT_decl_line -; CHECK-NEXT: .b32 158 // DW_AT_type -; CHECK-NEXT: .b8 6 // Abbrev [6] 0x90:0xd DW_TAG_variable -; CHECK-NEXT: .b8 122 // DW_AT_name +; CHECK-NEXT: .b8 4 // Abbrev [4] 0x8a:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 98 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 2 // DW_AT_decl_file -; CHECK-NEXT: .b8 7 // DW_AT_decl_line -; CHECK-NEXT: .b64 .debug_info+148 // DW_AT_type +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 1 // DW_AT_decl_line +; CHECK-NEXT: .b32 148 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 7 // Abbrev [7] 0x9e:0x4 DW_TAG_structure_type -; CHECK-NEXT: .b8 65 // DW_AT_name +; CHECK-NEXT: .b8 7 // Abbrev [7] 0x94:0x7 DW_TAG_base_type +; CHECK-NEXT: .b8 105 // DW_AT_name +; CHECK-NEXT: .b8 110 +; CHECK-NEXT: .b8 116 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 1 // DW_AT_declaration +; CHECK-NEXT: .b8 5 // DW_AT_encoding +; CHECK-NEXT: .b8 4 // DW_AT_byte_size ; CHECK-NEXT: .b8 0 // End Of Children Mark ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_loc { } diff --git a/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll b/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll index 483569a12deb..02590955262f 100644 --- a/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll +++ b/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll @@ -15,17 +15,17 @@ ; 3: DwarfUnit::addUInt() ; 4: addUInt(Block, (dwarf::Attribute)0, Form, Integer); -; CHECK: DW_AT_noreturn ; CHECK: DW_AT_name ("var") ; CHECK-NOT: DW_TAG_ ; CHECK: DW_AT_alignment ; CHECK: DW_AT_location (DW_OP_addr 0x0) +; CHECK: DW_AT_noreturn ; -; STRICT-NOT: DW_AT_noreturn ; STRICT: DW_AT_name ("var") ; STRICT-NOT: DW_AT_alignment ; STRICT-NOT: DW_TAG_ ; STRICT: DW_AT_location (DW_OP_addr 0x0) +; STRICT-NOT: DW_AT_noreturn @_ZL3var = internal global i32 0, align 16, !dbg !0 diff --git a/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll index 9c33051ea75e..14793460e14c 100644 --- a/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll +++ b/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll @@ -46,17 +46,17 @@ attributes #1 = { nounwind readnone } ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "LOC") +; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "GLB") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") +; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_line [DW_FORM_data1] (4) +; CHECK: DW_AT_decl_line [DW_FORM_data1] (1) ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "GLB") +; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "LOC") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") +; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_line [DW_FORM_data1] (1) +; CHECK: DW_AT_decl_line [DW_FORM_data1] (4) diff --git a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll index 82af77a1ef99..495f0f646628 100644 --- a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll +++ b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll @@ -21,6 +21,10 @@ ; CHECK: .debug_info contents: +; CHECK: DW_TAG_subroutine_type [[subroutine_abbrev]] * +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] {{.*}} +; CHECK-NEXT: DW_AT_calling_convention [DW_FORM_data1] (DW_CC_BORLAND_msfastcall) + ; CHECK: DW_TAG_subprogram [{{.*}}] * ; CHECK: DW_AT_low_pc ; CHECK: DW_AT_high_pc @@ -33,10 +37,6 @@ ; CHECK: DW_AT_type ; CHECK: DW_AT_external -; CHECK: DW_TAG_subroutine_type [[subroutine_abbrev]] * -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] {{.*}} -; CHECK-NEXT: DW_AT_calling_convention [DW_FORM_data1] (DW_CC_BORLAND_msfastcall) - ; ModuleID = 't.cpp' source_filename = "t.cpp" target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" diff --git a/llvm/test/DebugInfo/X86/align_cpp11.ll b/llvm/test/DebugInfo/X86/align_cpp11.ll index 537b7b4ff21e..14cfda7c06da 100644 --- a/llvm/test/DebugInfo/X86/align_cpp11.ll +++ b/llvm/test/DebugInfo/X86/align_cpp11.ll @@ -33,21 +33,6 @@ ; auto Lambda = [i](){}; ; } -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name{{.*}}"i" -; CHECK: DW_AT_alignment{{.*}}32 -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_class_type -; CHECK: DW_TAG_member -; CHECK: DW_AT_name{{.*}}"i" -; CHECK: DW_AT_alignment{{.*}}32 -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_formal_parameter - ; CHECK: DW_TAG_class_type ; CHECK: DW_AT_name{{.*}}"C0" ; CHECK: DW_AT_alignment{{.*}}64 @@ -68,6 +53,21 @@ ; CHECK: DW_TAG_enumerator ; CHECK: DW_TAG_enumerator +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name{{.*}}"i" +; CHECK: DW_AT_alignment{{.*}}32 +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_class_type +; CHECK: DW_TAG_member +; CHECK: DW_AT_name{{.*}}"i" +; CHECK: DW_AT_alignment{{.*}}32 +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_formal_parameter + ; CHECK: DW_TAG_class_type ; CHECK: DW_AT_name{{.*}}"C1" ; CHECK: DW_TAG_member diff --git a/llvm/test/DebugInfo/X86/align_objc.ll b/llvm/test/DebugInfo/X86/align_objc.ll index 375567096fc0..bc0abbdaa103 100644 --- a/llvm/test/DebugInfo/X86/align_objc.ll +++ b/llvm/test/DebugInfo/X86/align_objc.ll @@ -16,14 +16,7 @@ ; } ; CHECK: DW_TAG_compile_unit - -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name{{.*}}"i" -; CHECK: DW_AT_alignment{{.*}}32 ; CHECK: DW_TAG_variable - ; CHECK: DW_TAG_typedef ; CHECK: DW_AT_name{{.*}}"S0" @@ -32,6 +25,12 @@ ; CHECK: DW_TAG_member ; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name{{.*}}"i" +; CHECK: DW_AT_alignment{{.*}}32 + ; CHECK: DW_TAG_typedef ; CHECK: DW_AT_name{{.*}}"S1" ; CHECK: DW_TAG_structure_type diff --git a/llvm/test/DebugInfo/X86/arange-and-stub.ll b/llvm/test/DebugInfo/X86/arange-and-stub.ll index 9264fab1a8a4..db1837e02141 100644 --- a/llvm/test/DebugInfo/X86/arange-and-stub.ll +++ b/llvm/test/DebugInfo/X86/arange-and-stub.ll @@ -5,7 +5,7 @@ ; CHECK: .L_ZTId.DW.stub: ; CHECK: .data -; CHECK-NEXT: .Lsec_end1: +; CHECK-NEXT: .Lsec_end0: source_filename = "test/DebugInfo/X86/arange-and-stub.ll" target triple = "x86_64-linux-gnu" diff --git a/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll b/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll index 94cde895f35a..3e6c6d269082 100644 --- a/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll +++ b/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll @@ -2,7 +2,6 @@ ; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s ; Check that any type can have a vtable holder. -; CHECK: DW_TAG_structure_type ; CHECK: [[SP:.*]]: DW_TAG_structure_type ; CHECK-NOT: TAG ; CHECK: DW_AT_containing_type [DW_FORM_ref4] diff --git a/llvm/test/DebugInfo/X86/debug-info-access.ll b/llvm/test/DebugInfo/X86/debug-info-access.ll index fdc96dc0f421..2f21dfb3fe0e 100644 --- a/llvm/test/DebugInfo/X86/debug-info-access.ll +++ b/llvm/test/DebugInfo/X86/debug-info-access.ll @@ -64,10 +64,6 @@ ; F f; ; H h; -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name {{.*}}"free") -; CHECK-NOT: DW_AT_accessibility - ; CHECK: DW_TAG_member ; CHECK: DW_AT_name {{.*}}"union_priv") ; CHECK-NOT: DW_TAG @@ -128,6 +124,10 @@ ; CHECK: DW_AT_name ("I") ; CHECK: DW_AT_accessibility (DW_ACCESS_private) +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name {{.*}}"free") +; CHECK-NOT: DW_AT_accessibility + %union.U = type { i32 } %struct.A = type { i8 } %class.B = type { i8 } diff --git a/llvm/test/DebugInfo/X86/debug-info-blocks.ll b/llvm/test/DebugInfo/X86/debug-info-blocks.ll index 0173f856f0de..0914fa68620c 100644 --- a/llvm/test/DebugInfo/X86/debug-info-blocks.ll +++ b/llvm/test/DebugInfo/X86/debug-info-blocks.ll @@ -5,6 +5,10 @@ ; rdar://problem/9279956 ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} ) +; CHECK: [[A:.*]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_APPLE_objc_complete_type +; CHECK-NEXT: DW_AT_name{{.*}}"A" + ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram @@ -31,10 +35,6 @@ ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_artificial -; CHECK: [[A:.*]]: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_APPLE_objc_complete_type -; CHECK-NEXT: DW_AT_name{{.*}}"A" - ; CHECK: [[APTR]]: DW_TAG_pointer_type ; CHECK-NEXT: {[[A]]} diff --git a/llvm/test/DebugInfo/X86/debug-info-static-member.ll b/llvm/test/DebugInfo/X86/debug-info-static-member.ll index 915e48b3c03a..5e2f46686bd6 100644 --- a/llvm/test/DebugInfo/X86/debug-info-static-member.ll +++ b/llvm/test/DebugInfo/X86/debug-info-static-member.ll @@ -106,8 +106,6 @@ attributes #1 = { nounwind readnone } ; (for variables) or DW_AT_const_value (for constants). ; ; PRESENT: .debug_info contents: -; PRESENT: DW_TAG_subprogram -; PRESENT: DW_TAG_variable ; PRESENT: DW_TAG_variable ; PRESENT-NEXT: DW_AT_specification {{.*}} "a" ; PRESENT-NEXT: DW_AT_location @@ -158,8 +156,6 @@ attributes #1 = { nounwind readnone } ; For Darwin gdb: ; DARWINP: .debug_info contents: -; DARWINP: DW_TAG_subprogram -; DARWINP: DW_TAG_variable ; DARWINP: DW_TAG_variable ; DARWINP-NEXT: DW_AT_specification {{.*}} "a" ; DARWINP-NEXT: DW_AT_location diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.mir b/llvm/test/DebugInfo/X86/debug-loc-offset.mir index 72267ebcb417..95f524fc162c 100644 --- a/llvm/test/DebugInfo/X86/debug-loc-offset.mir +++ b/llvm/test/DebugInfo/X86/debug-loc-offset.mir @@ -31,22 +31,6 @@ # # Checking that we have two compile units with two sets of high/lo_pc. # CHECK: .debug_info contents -# -# CHECK: DW_TAG_compile_unit -# CHECK: DW_AT_low_pc {{.*}} (0x00000000 ".text") -# CHECK: DW_AT_high_pc -# -# CHECK: DW_TAG_subprogram -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_linkage_name [DW_FORM_strp]{{.*}}"_Z3bari" -# CHECK-NOT: {{DW_TAG|NULL}} -# CHECK: DW_TAG_formal_parameter -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_location [DW_FORM_sec_offset] ({{.*}} -# CHECK-NEXT: [0x00000000, 0x0000000a) ".text": DW_OP_consts +0, DW_OP_stack_value -# CHECK-NEXT: [0x0000000a, 0x00000017) ".text": DW_OP_consts +1, DW_OP_stack_value) -# CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"b" - # CHECK: DW_TAG_compile_unit # CHECK: DW_AT_low_pc {{.*}} (0x00000020 ".text") # CHECK: DW_AT_high_pc @@ -66,6 +50,21 @@ # CHECK: DW_AT_location [DW_FORM_exprloc] # CHECK-NOT: DW_AT_location # +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_low_pc {{.*}} (0x00000000 ".text") +# CHECK: DW_AT_high_pc +# +# CHECK: DW_TAG_subprogram +# CHECK-NOT: DW_TAG +# CHECK: DW_AT_linkage_name [DW_FORM_strp]{{.*}}"_Z3bari" +# CHECK-NOT: {{DW_TAG|NULL}} +# CHECK: DW_TAG_formal_parameter +# CHECK-NOT: DW_TAG +# CHECK: DW_AT_location [DW_FORM_sec_offset] ({{.*}} +# CHECK-NEXT: [0x00000000, 0x0000000a) ".text": DW_OP_consts +0, DW_OP_stack_value +# CHECK-NEXT: [0x0000000a, 0x00000017) ".text": DW_OP_consts +1, DW_OP_stack_value) +# CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"b" +# # CHECK: .debug_loc contents: # CHECK: 0x00000000: # CHECK-NEXT: (0x00000000, 0x0000000a): DW_OP_consts +0, DW_OP_stack_value diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges.ll b/llvm/test/DebugInfo/X86/dwarf-aranges.ll index b65eba451f4d..5358a30c6a06 100644 --- a/llvm/test/DebugInfo/X86/dwarf-aranges.ll +++ b/llvm/test/DebugInfo/X86/dwarf-aranges.ll @@ -8,22 +8,22 @@ ; -- alignment -- ; CHECK-NEXT: .zero 4,255 -; - it should have made one span covering all functions in this CU. -; CHECK-NEXT: .quad .Lfunc_begin0 -; CHECK-NEXT: .quad .Lsec_end0-.Lfunc_begin0 - ; - it should have made one span covering all vars in this CU. ; CHECK-NEXT: .quad some_data -; CHECK-NEXT: .quad .Lsec_end1-some_data +; CHECK-NEXT: .quad .Lsec_end0-some_data ; - it should have made one span covering all vars in this CU. ; CHECK-NEXT: .quad some_other -; CHECK-NEXT: .quad .Lsec_end2-some_other +; CHECK-NEXT: .quad .Lsec_end1-some_other ; - it should have made one span for each symbol. ; CHECK-NEXT: .quad some_bss ; CHECK-NEXT: .quad 4 +; - it should have made one span covering all functions in this CU. +; CHECK-NEXT: .quad .Lfunc_begin0 +; CHECK-NEXT: .quad .Lsec_end2-.Lfunc_begin0 + ; -- finish -- ; CHECK-NEXT: # ARange terminator diff --git a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll index 75faee0ac2b5..2af2b86590a2 100644 --- a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll +++ b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll @@ -17,10 +17,10 @@ ; This assumes the variable will appear before the function. ; LINKAGE1: .section .debug_info -; LINKAGE1: DW_TAG_subprogram +; LINKAGE1: DW_TAG_variable ; LINKAGE1-NOT: DW_TAG ; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}} -; LINKAGE1: DW_TAG_variable +; LINKAGE1: DW_TAG_subprogram ; LINKAGE1-NOT: DW_TAG ; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}} ; LINKAGE1: .section diff --git a/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll b/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll index 1a98fbe267f5..2d4937ee7f9f 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll +++ b/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll @@ -3,21 +3,21 @@ ; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s +; CHECK: [[MYMOD:0x[0-9a-f]+]]: DW_TAG_module +; CHECK: DW_AT_name ("mymod") +; CHECK: [[VAR1:0x[0-9a-f]+]]: DW_TAG_variable +; CHECK: DW_AT_name ("var1") + ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("main") ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("use_renamed") ; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_import ([[MYMOD:0x[0-9a-f]+]]) +; CHECK: DW_AT_import ([[MYMOD]]) ; CHECK: DW_TAG_imported_declaration -; CHECK: DW_AT_import ([[VAR1:0x[0-9a-f]+]]) +; CHECK: DW_AT_import ([[VAR1]]) ; CHECK: DW_AT_name ("var4") -; CHECK: [[MYMOD]]: DW_TAG_module -; CHECK: DW_AT_name ("mymod") -; CHECK: [[VAR1]]: DW_TAG_variable -; CHECK: DW_AT_name ("var1") - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;This test case is generated from ;;module mymod diff --git a/llvm/test/DebugInfo/X86/generate-odr-hash.ll b/llvm/test/DebugInfo/X86/generate-odr-hash.ll index f9153f9c9756..9fa954cd2485 100644 --- a/llvm/test/DebugInfo/X86/generate-odr-hash.ll +++ b/llvm/test/DebugInfo/X86/generate-odr-hash.ll @@ -54,7 +54,12 @@ ; FISSION-LABEL: .debug_info.dwo contents: ; CHECK: Compile Unit: length = [[CU_SIZE:[0-9a-f]+]] -; CHECK: DW_TAG_structure_type +; CHECK: [[BAR:^0x........]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_declaration +; CHECK-NEXT: DW_AT_signature {{.*}} (0x1d02f3be30cc5688) +; CHECK: [[FLUFFY:^0x........]]: DW_TAG_class_type +; CHECK-NEXT: DW_AT_declaration +; CHECK-NEXT: DW_AT_signature {{.*}} (0xb04af47397402e77) ; Ensure the CU-local type 'walrus' is not placed in a type unit. ; CHECK: [[WALRUS:^0x........]]: DW_TAG_structure_type @@ -63,13 +68,6 @@ ; CHECK-NEXT: DW_AT_decl_file ; CHECK-NEXT: DW_AT_decl_line -; CHECK: [[BAR:^0x........]]: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_signature {{.*}} (0x1d02f3be30cc5688) -; CHECK: [[FLUFFY:^0x........]]: DW_TAG_class_type -; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_signature {{.*}} (0xb04af47397402e77) - ; CHECK: [[WOMBAT:^0x........]]: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_declaration ; CHECK-NEXT: DW_AT_signature {{.*}} (0xfd756cee88f8a118) @@ -123,10 +121,10 @@ ; CHECK-LABEL: .debug_line contents: ; CHECK: Line table prologue ; CHECK-NOT: file_names[ -; CHECK: file_names[ -; CHECK-NEXT: name: "bar.cpp" ; SINGLE: file_names[ ; SINGLE-NEXT: name: "bar.h" +; CHECK: file_names[ +; CHECK-NEXT: name: "bar.cpp" ; CHECK-NOT: file_names[ ; FISSION: .debug_line.dwo contents: diff --git a/llvm/test/DebugInfo/X86/gnu-public-names.ll b/llvm/test/DebugInfo/X86/gnu-public-names.ll index de44b4341868..d19358ee6e58 100644 --- a/llvm/test/DebugInfo/X86/gnu-public-names.ll +++ b/llvm/test/DebugInfo/X86/gnu-public-names.ll @@ -77,6 +77,9 @@ ; CHECK: DW_AT_GNU_pubnames (true) ; CHECK-NOT: DW_AT_GNU_pubtypes [ +; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable +; CHECK: DW_AT_specification {{.*}} "static_member_variable" + ; CHECK: [[C:0x[0-9a-f]+]]: DW_TAG_structure_type ; CHECK: DW_AT_name ("C") ; CHECK: DW_TAG_member @@ -95,23 +98,11 @@ ; CHECK: DW_AT_name ("int") ; CHECK: DW_TAG_pointer_type -; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_specification {{.*}} "_ZN1C15member_functionEv" -; CHECK: DW_TAG_formal_parameter -; CHECK: NULL - -; CHECK: [[STATIC_MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_specification {{.*}} "_ZN1C22static_member_functionEv" - -; CHECK: [[GLOBAL_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_linkage_name -; CHECK: DW_AT_name ("global_function") +; CHECK: [[GLOB_VAR:0x[0-9a-f]+]]: DW_TAG_variable +; CHECK: DW_AT_name ("global_variable") ; CHECK: [[NS:0x[0-9a-f]+]]: DW_TAG_namespace ; CHECK: DW_AT_name ("ns") -; CHECK: [[GLOB_NS_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_linkage_name -; CHECK: DW_AT_name ("global_namespace_function") ; CHECK: [[GLOB_NS_VAR:0x[0-9a-f]+]]: DW_TAG_variable ; CHECK: DW_AT_name ("global_namespace_variable") ; CHECK-NOT: DW_AT_specification @@ -127,6 +118,9 @@ ; CHECK: DW_TAG_member ; CHECK: NULL ; CHECK: DW_TAG_variable +; CHECK: [[GLOB_NS_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_linkage_name +; CHECK: DW_AT_name ("global_namespace_function") ; CHECK: NULL ; CHECK: DW_TAG_subprogram @@ -136,13 +130,6 @@ ; CHECK: DW_AT_location ; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable -; CHECK: DW_AT_specification {{.*}} "static_member_variable" - -; CHECK: [[GLOB_VAR:0x[0-9a-f]+]]: DW_TAG_variable -; CHECK: DW_AT_name ("global_variable") - ; CHECK: [[ANON:.*]]: DW_TAG_namespace ; CHECK-NOT: DW_AT_name ; CHECK: [[ANON_I:.*]]: DW_TAG_variable @@ -184,6 +171,20 @@ ; CHECK: NULL ; CHECK: DW_TAG_imported_declaration + +; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_specification {{.*}} "_ZN1C15member_functionEv" +; CHECK: DW_TAG_formal_parameter +; CHECK: NULL + +; CHECK: [[STATIC_MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_specification {{.*}} "_ZN1C22static_member_functionEv" + +; CHECK: [[GLOBAL_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_linkage_name +; CHECK: DW_AT_name ("global_function") + +; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_pointer_type ; CHECK: DW_TAG_pointer_type ; CHECK: NULL diff --git a/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll b/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll index 38a096c7bfc9..13a8c0acb277 100644 --- a/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll +++ b/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll @@ -32,6 +32,8 @@ ; CHECK: DW_TAG_lexical_block ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_variable +; CHECK-NOT: {{DW_TAG|NULL}} +; CHECK: DW_TAG_imported_module ;; Abstract "bar" function ; CHECK: [[Offset_bar]]: DW_TAG_subprogram @@ -58,6 +60,9 @@ ; CHECK: DW_TAG_lexical_block ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_variable +; CHECK-NOT: {{DW_TAG|NULL}} +; CHECK: DW_TAG_imported_module + ; Function Attrs: alwaysinline nounwind define i32 @_Z3barv() #0 !dbg !4 { diff --git a/llvm/test/DebugInfo/X86/linkage-name.ll b/llvm/test/DebugInfo/X86/linkage-name.ll index e43cccfb5fcf..f5c82b1759a3 100644 --- a/llvm/test/DebugInfo/X86/linkage-name.ll +++ b/llvm/test/DebugInfo/X86/linkage-name.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj ; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s -; CHECK: DW_TAG_subprogram [8] * +; CHECK: DW_TAG_subprogram [9] * ; CHECK-NOT: DW_AT_{{(MIPS_)?}}linkage_name ; CHECK: DW_AT_specification diff --git a/llvm/test/DebugInfo/X86/namelist1.ll b/llvm/test/DebugInfo/X86/namelist1.ll index 149d6cbbe318..57316a806b71 100644 --- a/llvm/test/DebugInfo/X86/namelist1.ll +++ b/llvm/test/DebugInfo/X86/namelist1.ll @@ -4,11 +4,10 @@ ; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu %s -filetype=obj -o %t.o ; RUN: llvm-dwarfdump %t.o | FileCheck %s ; -; CHECK: [[ITEM2:0x.+]]: DW_TAG_variable -; CHECK: DW_AT_name ("b") -; CHECK: DW_TAG_variable ; CHECK: [[ITEM1:0x.+]]: DW_TAG_variable ; CHECK: DW_AT_name ("a") +; CHECK: [[ITEM2:0x.+]]: DW_TAG_variable +; CHECK: DW_AT_name ("b") ; CHECK: DW_TAG_namelist ; CHECK: DW_AT_name ("nml") ; CHECK: DW_TAG_namelist_item diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll index 673e982b0534..479f2f605421 100644 --- a/llvm/test/DebugInfo/X86/sret.ll +++ b/llvm/test/DebugInfo/X86/sret.ll @@ -3,8 +3,8 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK-DWO: DW_AT_GNU_dwo_id (0x46a03e2a3394ce47) -; CHECK-DWO: DW_AT_GNU_dwo_id (0x46a03e2a3394ce47) +; CHECK-DWO: DW_AT_GNU_dwo_id (0xa58a336e896549f1) +; CHECK-DWO: DW_AT_GNU_dwo_id (0xa58a336e896549f1) ; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,FASTISEL %s ; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,SDAG %s diff --git a/llvm/test/DebugInfo/X86/subprogram-across-cus.ll b/llvm/test/DebugInfo/X86/subprogram-across-cus.ll index a265b8e2bc30..345342aecf3c 100644 --- a/llvm/test/DebugInfo/X86/subprogram-across-cus.ll +++ b/llvm/test/DebugInfo/X86/subprogram-across-cus.ll @@ -35,12 +35,12 @@ ; Check that there are no verifier failures, and that the SP for "main" appears ; in the correct CU. ; CHECK-LABEL: DW_TAG_compile_unit +; CHECK: DW_AT_name ("1.cpp") +; CHECK-NOT: DW_AT_name ("main") +; CHECK-LABEL: DW_TAG_compile_unit ; CHECK: DW_AT_name ("2.cpp") ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("main") -; CHECK-LABEL: DW_TAG_compile_unit -; CHECK: DW_AT_name ("1.cpp") -; CHECK-NOT: DW_AT_name ("main") source_filename = "ld-temp.o" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/X86/template.ll b/llvm/test/DebugInfo/X86/template.ll index 8a071cc81389..6ac7f2bc0a57 100644 --- a/llvm/test/DebugInfo/X86/template.ll +++ b/llvm/test/DebugInfo/X86/template.ll @@ -15,10 +15,26 @@ ; VERIFY-NOT: error: +; CHECK: [[INT:0x[0-9a-f]*]]:{{ *}}DW_TAG_base_type +; CHECK-NEXT: DW_AT_name{{.*}} = "int" + +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name{{.*}}"y_impl" +; CHECK-NOT: {{TAG|NULL}} +; CHECK: DW_TAG_template_type_parameter + +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name{{.*}}"var" +; CHECK-NOT: NULL +; CHECK: DW_TAG_template_type_parameter +; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} +; CHECK-NEXT: DW_AT_name{{.*}}= "T" + + ; CHECK: DW_AT_name{{.*}}"func<3, &glbl, y_impl, nullptr, E, 1, 2>" ; CHECK-NOT: NULL ; CHECK: DW_TAG_template_value_parameter -; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT:0x[0-9a-f]*]]} +; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} ; CHECK-NEXT: DW_AT_name{{.*}}= "x" ; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(3) @@ -55,21 +71,6 @@ ; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} ; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(2) -; CHECK: [[INT]]:{{ *}}DW_TAG_base_type -; CHECK-NEXT: DW_AT_name{{.*}} = "int" - -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name{{.*}}"y_impl" -; CHECK-NOT: {{TAG|NULL}} -; CHECK: DW_TAG_template_type_parameter - -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_name{{.*}}"var" -; CHECK-NOT: NULL -; CHECK: DW_TAG_template_type_parameter -; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} -; CHECK-NEXT: DW_AT_name{{.*}}= "T" - ; CHECK: [[INTPTR]]:{{ *}}DW_TAG_pointer_type ; CHECK-NEXT: DW_AT_type{{.*}} => {[[INT]]} diff --git a/llvm/test/DebugInfo/X86/tls.ll b/llvm/test/DebugInfo/X86/tls.ll index 33ab5559fcb9..ecb1d93bbd28 100644 --- a/llvm/test/DebugInfo/X86/tls.ll +++ b/llvm/test/DebugInfo/X86/tls.ll @@ -30,45 +30,33 @@ ; that here instead of raw assembly printing ; FISSION: .section .debug_info.dwo, +; 3 bytes of data in this DW_FORM_exprloc representation of the location of 'tls' +; FISSION: .byte 3{{ *}}# DW_AT_location +; DW_OP_GNU_const_index (0xfx == 252) to refer to the debug_addr table +; FISSION-NEXT: .byte 252 +; an index of zero into the debug_addr table +; FISSION-NEXT: .byte 0 + ; SINGLE: .section .debug_info, ; DARWIN: .section {{.*}}debug_info, -; SINGLE-64: DW_TAG_variable ; 10 bytes of data in this DW_FORM_exprloc representation of the location of 'tls' ; SINGLE-64: .byte 10 # DW_AT_location ; DW_OP_const8u (0x0e == 14) of address ; SINGLE-64-NEXT: .byte 14 ; SINGLE-64-NEXT: .quad tls@DTPOFF -; DARWIN: DW_TAG_variable ; DARWIN: .byte 10 ## DW_AT_location ; DW_OP_const8u (0x0e == 14) of address ; DARWIN-NEXT: .byte 14 ; DARWIN-NEXT: .quad _tls -; SINGLE-32: DW_TAG_variable ; 6 bytes of data in 32-bit mode ; SINGLE-32: .byte 6 # DW_AT_location ; DW_OP_const4u (0x0e == 12) of address ; SINGLE-32-NEXT: .byte 12 ; SINGLE-32-NEXT: .long tls@DTPOFF -; FISSION: DW_TAG_template_value_parameter -; FISSION: .byte 3 # DW_AT_location -; DW_OP_GNU_addr_index -; FISSION-NEXT: .byte 251 -; FISSION-NEXT: .byte 2 -; DW_OP_stack_value -; FISSION-NEXT: .byte 159 - -; FISSION: DW_TAG_variable -; 3 bytes of data in this DW_FORM_exprloc representation of the location of 'tls' -; FISSION: .byte 3{{ *}}# DW_AT_location -; DW_OP_GNU_const_index (0xfx == 252) to refer to the debug_addr table -; FISSION-NEXT: .byte 252 -; an index of 1 into the debug_addr table -; FISSION-NEXT: .byte 1 - ; DW_OP_GNU_push_tls_address ; GNUOP-NEXT: .byte 224 ; DW_OP_form_tls_address @@ -78,12 +66,19 @@ ; FISSION: .byte 2 # DW_AT_location ; DW_OP_GNU_addr_index ; FISSION-NEXT: .byte 251 -; FISSION-NEXT: .byte 2 +; FISSION-NEXT: .byte 1 + +; FISSION: DW_TAG_template_value_parameter +; FISSION: .byte 3 # DW_AT_location +; DW_OP_GNU_addr_index +; FISSION-NEXT: .byte 251 +; FISSION-NEXT: .byte 1 +; DW_OP_stack_value +; FISSION-NEXT: .byte 159 ; check that the expected TLS address description is the first thing in the debug_addr section ; FISSION: .section .debug_addr ; FISSION-NEXT: .Laddr_table_base0: -; FISSION-NEXT: .quad .Lfunc_begin0 ; FISSION-NEXT: .quad tls@DTPOFF ; FISSION-NEXT: .quad glbl ; FISSION-NOT: .quad glbl diff --git a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll b/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll deleted file mode 100644 index 78baf7987157..000000000000 --- a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll +++ /dev/null @@ -1,133 +0,0 @@ -; RUN: llc %s -mtriple=x86_64-linux-gnu -generate-type-units -o - -filetype=obj \ -; RUN: | llvm-dwarfdump -o - - \ -; RUN: | FileCheck %s - -;; PR51087 -;; Check that types that are not referenecd in the CU and have type units -;; do not get unecessary skeleton DIEs in the CU, and that types that have -;; type units but are referenced in the CU still get CU DIEs. -;; -;; In the test (source below): -;; Unused is not used anywhere and should get only a type unit. -;; -;; Outer is used (by global O) so should get a CU DIE, but none of its nested -;; types (Inner, then nested again Enum1 and Enum2) are used so they should not. -;; -;; Ex is not used directly, but its nested type Enum is, so both should get -;; a DIE in the CU. Retained types and enums are emitted after globals, so -;; having Enum used by a local variable lets us check that type DIEs emitted -;; for types that initially only need type units still get a CU DIE later on -;; if required. -;; -;; Generated with `-Xclang -debug-info-kind=unused-types` (for Unused) from: -;; $ cat test.cpp -;; struct Unused {}; -;; -;; class Outer { -;; public: -;; struct Inner { -;; enum Enum1 { X }; -;; enum Enum2 { Y }; -;; Enum1 one; -;; Enum2 two; -;; }; -;; -;; Inner n; -;; } O; -;; -;; struct Ex { enum Enum { X }; }; -;; void fun() { Ex::Enum local; } - -;; Note: The types without a type_signature match here should only get type -;; units and no CU DIE. -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Outer'{{.+}} type_signature = [[SIG_Outer:[0-9a-fx]+]] -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Inner'{{.+}} -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum1'{{.+}} -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum2'{{.+}} -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum'{{.+}} type_signature = [[SIG_Enum:[0-9a-fx]+]] -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Ex'{{.+}} type_signature = [[SIG_Ex:[0-9a-fx]+]] -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Unused'{{.+}} - -;; Check the CU references and skeleton DIEs are emitted correctly. -;; The check-not directives check that Unused doesn't get a DIE in the CU. -; CHECK: DW_TAG_compile_unit -; CHECK-NOT: DW_AT_signature -; CHECK: DW_AT_type ([[DIE_Enum:[0-9a-fx]+]] "Ex::Enum") -; CHECK-NOT: DW_AT_signature -; CHECK: DW_AT_type ([[DIE_Outer:[0-9a-fx]+]] "Outer") -; CHECK-NOT: DW_AT_signature - -;; Outer is referenced in the CU so it needs a DIE, but its nested enums are not -;; and so should not have DIEs here. -; CHECK: [[DIE_Outer]]: DW_TAG_class_type -; CHECK-NEXT: DW_AT_declaration (true) -; CHECK-NEXT: DW_AT_signature ([[SIG_Outer]]) -; CHECK-EMPTY: - -;; Ex is not referenced in the CU but its nested type, Enum, is. -; CHECK-NEXT: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_declaration (true) -; CHECK-NEXT: DW_AT_signature ([[SIG_Ex]]) -; CHECK-EMPTY: -; CHECK-NEXT: [[DIE_Enum]]: DW_TAG_enumeration_type -; CHECK-NEXT: DW_AT_declaration (true) -; CHECK-NEXT: DW_AT_signature ([[SIG_Enum]]) - -;; One last check that Unused has no CU DIE. -; CHECK-NOT: DW_AT_signature - -%class.Outer = type { %"struct.Outer::Inner" } -%"struct.Outer::Inner" = type { i32, i32 } - -@O = dso_local global %class.Outer zeroinitializer, align 4, !dbg !0 - -define dso_local void @_Z3funv() !dbg !31 { -entry: - %local = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %local, metadata !34, metadata !DIExpression()), !dbg !35 - ret void, !dbg !36 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!25, !26, !27, !28, !29} -!llvm.ident = !{!30} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "O", scope: !2, file: !3, line: 13, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !22, globals: !24, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "test.cpp", directory: "/") -!4 = !{!5, !13, !19} -!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum1", scope: !6, file: !3, line: 6, baseType: !14, size: 32, elements: !17, identifier: "_ZTSN5Outer5Inner5Enum1E") -!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Inner", scope: !7, file: !3, line: 5, size: 64, flags: DIFlagTypePassByValue, elements: !10, identifier: "_ZTSN5Outer5InnerE") -!7 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "Outer", file: !3, line: 3, size: 64, flags: DIFlagTypePassByValue, elements: !8, identifier: "_ZTS5Outer") -!8 = !{!9} -!9 = !DIDerivedType(tag: DW_TAG_member, name: "n", scope: !7, file: !3, line: 12, baseType: !6, size: 64, flags: DIFlagPublic) -!10 = !{!11, !12} -!11 = !DIDerivedType(tag: DW_TAG_member, name: "one", scope: !6, file: !3, line: 8, baseType: !5, size: 32) -!12 = !DIDerivedType(tag: DW_TAG_member, name: "two", scope: !6, file: !3, line: 9, baseType: !13, size: 32, offset: 32) -!13 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum2", scope: !6, file: !3, line: 7, baseType: !14, size: 32, elements: !15, identifier: "_ZTSN5Outer5Inner5Enum2E") -!14 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) -!15 = !{!16} -!16 = !DIEnumerator(name: "Y", value: 0, isUnsigned: true) -!17 = !{!18} -!18 = !DIEnumerator(name: "X", value: 0, isUnsigned: true) -!19 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum", scope: !20, file: !3, line: 15, baseType: !14, size: 32, elements: !17, identifier: "_ZTSN2Ex4EnumE") -!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ex", file: !3, line: 15, size: 8, flags: DIFlagTypePassByValue, elements: !21, identifier: "_ZTS2Ex") -!21 = !{} -!22 = !{!23, !7, !6, !20} -!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Unused", file: !3, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !21, identifier: "_ZTS6Unused") -!24 = !{!0} -!25 = !{i32 7, !"Dwarf Version", i32 5} -!26 = !{i32 2, !"Debug Info Version", i32 3} -!27 = !{i32 1, !"wchar_size", i32 4} -!28 = !{i32 7, !"uwtable", i32 1} -!29 = !{i32 7, !"frame-pointer", i32 2} -!30 = !{!"clang version 14.0.0"} -!31 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !3, file: !3, line: 16, type: !32, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !21) -!32 = !DISubroutineType(types: !33) -!33 = !{null} -!34 = !DILocalVariable(name: "local", scope: !31, file: !3, line: 16, type: !19) -!35 = !DILocation(line: 16, column: 23, scope: !31) -!36 = !DILocation(line: 16, column: 30, scope: !31) diff --git a/llvm/test/DebugInfo/X86/vla-global.ll b/llvm/test/DebugInfo/X86/vla-global.ll index 81d00ead44aa..b846df22c37e 100644 --- a/llvm/test/DebugInfo/X86/vla-global.ll +++ b/llvm/test/DebugInfo/X86/vla-global.ll @@ -1,7 +1,4 @@ ; RUN: llc -mtriple=x86_64-apple-darwin %s -o - -filetype=obj | llvm-dwarfdump - | FileCheck %s -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable ; CHECK: 0x00000[[G:.*]]: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("g") ; CHECK: DW_TAG_array_type diff --git a/llvm/test/DebugInfo/attr-btf_tag.ll b/llvm/test/DebugInfo/attr-btf_tag.ll index 5a603c551f22..ecbb1e721d5a 100644 --- a/llvm/test/DebugInfo/attr-btf_tag.ll +++ b/llvm/test/DebugInfo/attr-btf_tag.ll @@ -43,6 +43,37 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } !llvm.module.flags = !{!10, !11, !12, !13, !14} !llvm.ident = !{!15} +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g1", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true, annotations: !7) + +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name ("g1") +; CHECK: DW_TAG_LLVM_annotation +; CHECK-NEXT: DW_AT_name ("btf_decl_tag") +; CHECK-NEXT: DW_AT_const_value ("tag1") +; CHECK-EMPTY: +; CHECK-NEXT: DW_TAG_LLVM_annotation +; CHECK-NEXT: DW_AT_name ("btf_decl_tag") +; CHECK-NEXT: DW_AT_const_value ("tag2") +; CHECK-EMPTY: +; CHECK-NEXT: NULL + +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/work/tests/llvm/btf_tag") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{!8, !9} +!8 = !{!"btf_decl_tag", !"tag1"} +!9 = !{!"btf_decl_tag", !"tag2"} +!10 = !{i32 7, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 7, !"uwtable", i32 1} +!14 = !{i32 7, !"frame-pointer", i32 2} +!15 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)"} +!16 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 10, type: !17, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4, annotations: !7) + ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("foo") ; CHECK: DW_TAG_formal_parameter @@ -66,17 +97,12 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } ; CHECK-EMPTY: ; CHECK-NEXT: NULL -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_name ("g1") -; CHECK: DW_TAG_LLVM_annotation -; CHECK-NEXT: DW_AT_name ("btf_decl_tag") -; CHECK-NEXT: DW_AT_const_value ("tag1") -; CHECK-EMPTY: -; CHECK-NEXT: DW_TAG_LLVM_annotation -; CHECK-NEXT: DW_AT_name ("btf_decl_tag") -; CHECK-NEXT: DW_AT_const_value ("tag2") -; CHECK-EMPTY: -; CHECK-NEXT: NULL +!17 = !DISubroutineType(types: !18) +!18 = !{!6, !19} +!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) +!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 4, size: 32, elements: !21, annotations: !7) +!21 = !{!22} +!22 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !20, file: !3, line: 5, baseType: !6, size: 32, annotations: !7) ; CHECK: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_name ("t1") @@ -102,29 +128,6 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } ; CHECK-EMPTY: ; CHECK-NEXT: NULL -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "g1", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true, annotations: !7) -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/work/tests/llvm/btf_tag") -!4 = !{} -!5 = !{!0} -!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!7 = !{!8, !9} -!8 = !{!"btf_decl_tag", !"tag1"} -!9 = !{!"btf_decl_tag", !"tag2"} -!10 = !{i32 7, !"Dwarf Version", i32 4} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i32 1, !"wchar_size", i32 4} -!13 = !{i32 7, !"uwtable", i32 1} -!14 = !{i32 7, !"frame-pointer", i32 2} -!15 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)"} -!16 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 10, type: !17, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4, annotations: !7) -!17 = !DISubroutineType(types: !18) -!18 = !{!6, !19} -!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) -!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 4, size: 32, elements: !21, annotations: !7) -!21 = !{!22} -!22 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !20, file: !3, line: 5, baseType: !6, size: 32, annotations: !7) !23 = !DILocalVariable(name: "arg", arg: 1, scope: !16, file: !3, line: 10, type: !19, annotations: !7) !24 = !DILocation(line: 10, column: 48, scope: !16) !25 = !DILocation(line: 11, column: 10, scope: !16) diff --git a/llvm/test/MC/WebAssembly/debug-info.ll b/llvm/test/MC/WebAssembly/debug-info.ll index a234350546f7..d2a815f09716 100644 --- a/llvm/test/MC/WebAssembly/debug-info.ll +++ b/llvm/test/MC/WebAssembly/debug-info.ll @@ -154,20 +154,20 @@ ; CHECK-NEXT: 0x16 R_WASM_SECTION_OFFSET_I32 .debug_line 0 ; CHECK-NEXT: 0x1A R_WASM_SECTION_OFFSET_I32 .debug_str 62 ; CHECK-NEXT: 0x1E R_WASM_FUNCTION_OFFSET_I32 f2 0 -; CHECK-NEXT: 0x27 R_WASM_FUNCTION_OFFSET_I32 f2 0 -; CHECK-NEXT: 0x32 R_WASM_GLOBAL_INDEX_I32 __stack_pointer -; CHECK-NEXT: 0x37 R_WASM_SECTION_OFFSET_I32 .debug_str 118 -; CHECK-NEXT: 0x3E R_WASM_SECTION_OFFSET_I32 .debug_str 105 -; CHECK-NEXT: 0x4A R_WASM_MEMORY_ADDR_I32 foo 0 -; CHECK-NEXT: 0x54 R_WASM_SECTION_OFFSET_I32 .debug_str 109 -; CHECK-NEXT: 0x5B R_WASM_SECTION_OFFSET_I32 .debug_str 113 -; CHECK-NEXT: 0x67 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x27 R_WASM_SECTION_OFFSET_I32 .debug_str 105 +; CHECK-NEXT: 0x33 R_WASM_MEMORY_ADDR_I32 foo 0 +; CHECK-NEXT: 0x3D R_WASM_SECTION_OFFSET_I32 .debug_str 109 +; CHECK-NEXT: 0x44 R_WASM_SECTION_OFFSET_I32 .debug_str 113 +; CHECK-NEXT: 0x50 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x5B R_WASM_FUNCTION_OFFSET_I32 f2 0 +; CHECK-NEXT: 0x66 R_WASM_GLOBAL_INDEX_I32 __stack_pointer +; CHECK-NEXT: 0x6B R_WASM_SECTION_OFFSET_I32 .debug_str 118 ; CHECK-NEXT: } ; CHECK-NEXT: Section (10) .debug_aranges { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 -; CHECK-NEXT: 0x10 R_WASM_FUNCTION_OFFSET_I32 f2 0 -; CHECK-NEXT: 0x18 R_WASM_MEMORY_ADDR_I32 foo 0 -; CHECK-NEXT: 0x20 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x10 R_WASM_MEMORY_ADDR_I32 foo 0 +; CHECK-NEXT: 0x18 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x20 R_WASM_FUNCTION_OFFSET_I32 f2 0 ; CHECK-NEXT: } ; CHECK-NEXT: Section (12) .debug_pubnames { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 diff --git a/llvm/test/MC/WebAssembly/debug-info64.ll b/llvm/test/MC/WebAssembly/debug-info64.ll index af5b57511fcc..47b33aac104c 100644 --- a/llvm/test/MC/WebAssembly/debug-info64.ll +++ b/llvm/test/MC/WebAssembly/debug-info64.ll @@ -160,20 +160,20 @@ ; CHECK-NEXT: 0x16 R_WASM_SECTION_OFFSET_I32 .debug_line 0 ; CHECK-NEXT: 0x1A R_WASM_SECTION_OFFSET_I32 .debug_str 62 ; CHECK-NEXT: 0x1E R_WASM_FUNCTION_OFFSET_I64 f2 0 -; CHECK-NEXT: 0x2B R_WASM_FUNCTION_OFFSET_I64 f2 0 -; CHECK-NEXT: 0x3A R_WASM_GLOBAL_INDEX_I32 __stack_pointer -; CHECK-NEXT: 0x3F R_WASM_SECTION_OFFSET_I32 .debug_str 118 -; CHECK-NEXT: 0x46 R_WASM_SECTION_OFFSET_I32 .debug_str 105 -; CHECK-NEXT: 0x52 R_WASM_MEMORY_ADDR_I64 foo 0 -; CHECK-NEXT: 0x60 R_WASM_SECTION_OFFSET_I32 .debug_str 109 -; CHECK-NEXT: 0x67 R_WASM_SECTION_OFFSET_I32 .debug_str 113 -; CHECK-NEXT: 0x73 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x2B R_WASM_SECTION_OFFSET_I32 .debug_str 105 +; CHECK-NEXT: 0x37 R_WASM_MEMORY_ADDR_I64 foo 0 +; CHECK-NEXT: 0x45 R_WASM_SECTION_OFFSET_I32 .debug_str 109 +; CHECK-NEXT: 0x4C R_WASM_SECTION_OFFSET_I32 .debug_str 113 +; CHECK-NEXT: 0x58 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x67 R_WASM_FUNCTION_OFFSET_I64 f2 0 +; CHECK-NEXT: 0x76 R_WASM_GLOBAL_INDEX_I32 __stack_pointer +; CHECK-NEXT: 0x7B R_WASM_SECTION_OFFSET_I32 .debug_str 118 ; CHECK-NEXT: } ; CHECK-NEXT: Section (10) .debug_aranges { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 -; CHECK-NEXT: 0x10 R_WASM_FUNCTION_OFFSET_I64 f2 0 -; CHECK-NEXT: 0x20 R_WASM_MEMORY_ADDR_I64 foo 0 -; CHECK-NEXT: 0x30 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x10 R_WASM_MEMORY_ADDR_I64 foo 0 +; CHECK-NEXT: 0x20 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x30 R_WASM_FUNCTION_OFFSET_I64 f2 0 ; CHECK-NEXT: } ; CHECK-NEXT: Section (12) .debug_pubnames { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 diff --git a/llvm/test/MC/WebAssembly/dwarfdump.ll b/llvm/test/MC/WebAssembly/dwarfdump.ll index 8e11ce3f93ee..cb2e3e613af9 100644 --- a/llvm/test/MC/WebAssembly/dwarfdump.ll +++ b/llvm/test/MC/WebAssembly/dwarfdump.ll @@ -15,46 +15,46 @@ ; CHECK-NEXT: DW_AT_low_pc (0x00000002) ; CHECK-NEXT: DW_AT_high_pc (0x00000004) -; CHECK: 0x00000026: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc (0x00000002) -; CHECK-NEXT: DW_AT_high_pc (0x00000004) -; CHECK-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) -; CHECK-NEXT: DW_AT_name ("f2") -; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") -; CHECK-NEXT: DW_AT_decl_line (2) -; CHECK-NEXT: DW_AT_prototyped (true) -; CHECK-NEXT: DW_AT_external (true) - -; CHECK: 0x0000003d: DW_TAG_variable +; CHECK: 0x00000026: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("foo") -; CHECK-NEXT: DW_AT_type (0x0000004e "int *") +; CHECK-NEXT: DW_AT_type (0x00000037 "int *") ; CHECK-NEXT: DW_AT_external (true) ; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line (4) ; CHECK-NEXT: DW_AT_location (DW_OP_addr 0x0) -; CHECK: 0x0000004e: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type (0x00000053 "int") +; CHECK: 0x00000037: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x0000003c "int") -; CHECK: 0x00000053: DW_TAG_base_type +; CHECK: 0x0000003c: DW_TAG_base_type ; CHECK-NEXT: DW_AT_name ("int") ; CHECK-NEXT: DW_AT_encoding (DW_ATE_signed) ; CHECK-NEXT: DW_AT_byte_size (0x04) -; CHECK: 0x0000005a: DW_TAG_variable +; CHECK: 0x00000043: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("ptr2") -; CHECK-NEXT: DW_AT_type (0x0000006b "void (*)()") +; CHECK-NEXT: DW_AT_type (0x00000054 "void (*)()") ; CHECK-NEXT: DW_AT_external (true) ; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line (5) ; CHECK-NEXT: DW_AT_location (DW_OP_addr 0x4) -; CHECK: 0x0000006b: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type (0x00000070 "void ()") +; CHECK: 0x00000054: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x00000059 "void ()") -; CHECK: 0x00000070: DW_TAG_subroutine_type +; CHECK: 0x00000059: DW_TAG_subroutine_type ; CHECK-NEXT: DW_AT_prototyped (true) +; CHECK: 0x0000005a: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_low_pc (0x00000002) +; CHECK-NEXT: DW_AT_high_pc (0x00000004) +; CHECK-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name ("f2") +; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") +; CHECK-NEXT: DW_AT_decl_line (2) +; CHECK-NEXT: DW_AT_prototyped (true) +; CHECK-NEXT: DW_AT_external (true) + ; CHECK: 0x00000071: NULL @@ -66,48 +66,48 @@ ; SPLIT-NEXT: DW_AT_language (DW_LANG_C99) ; SPLIT-NEXT: DW_AT_name ("test.c") ; SPLIT-NEXT: DW_AT_GNU_dwo_name ("{{.*}}dwarfdump.ll.tmp.dwo") -; SPLIT-NEXT: DW_AT_GNU_dwo_id (0x0642bb5dada25ca6) +; SPLIT-NEXT: DW_AT_GNU_dwo_id (0xad3151f12153fa17) -; SPLIT: 0x00000019: DW_TAG_subprogram -; SPLIT-NEXT: DW_AT_low_pc (indexed (00000000) address = ) -; SPLIT-NEXT: DW_AT_high_pc (0x00000002) -; SPLIT-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) -; SPLIT-NEXT: DW_AT_name ("f2") -; SPLIT-NEXT: DW_AT_decl_file (0x01) -; SPLIT-NEXT: DW_AT_decl_line (2) -; SPLIT-NEXT: DW_AT_prototyped (true) -; SPLIT-NEXT: DW_AT_external (true) - -; SPLIT: 0x0000002a: DW_TAG_variable +; SPLIT: 0x00000019: DW_TAG_variable ; SPLIT-NEXT: DW_AT_name ("foo") -; SPLIT-NEXT: DW_AT_type (0x00000035 "int *") +; SPLIT-NEXT: DW_AT_type (0x00000024 "int *") ; SPLIT-NEXT: DW_AT_external (true) ; SPLIT-NEXT: DW_AT_decl_file (0x01) ; SPLIT-NEXT: DW_AT_decl_line (4) -; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x1) +; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x0) -; SPLIT: 0x00000035: DW_TAG_pointer_type -; SPLIT-NEXT: DW_AT_type (0x0000003a "int") +; SPLIT: 0x00000024: DW_TAG_pointer_type +; SPLIT-NEXT: DW_AT_type (0x00000029 "int") -; SPLIT: 0x0000003a: DW_TAG_base_type +; SPLIT: 0x00000029: DW_TAG_base_type ; SPLIT-NEXT: DW_AT_name ("int") ; SPLIT-NEXT: DW_AT_encoding (DW_ATE_signed) ; SPLIT-NEXT: DW_AT_byte_size (0x04) -; SPLIT: 0x0000003e: DW_TAG_variable +; SPLIT: 0x0000002d: DW_TAG_variable ; SPLIT-NEXT: DW_AT_name ("ptr2") -; SPLIT-NEXT: DW_AT_type (0x00000049 "void (*)()") +; SPLIT-NEXT: DW_AT_type (0x00000038 "void (*)()") ; SPLIT-NEXT: DW_AT_external (true) ; SPLIT-NEXT: DW_AT_decl_file (0x01) ; SPLIT-NEXT: DW_AT_decl_line (5) -; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x2) +; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x1) -; SPLIT: 0x00000049: DW_TAG_pointer_type -; SPLIT-NEXT: DW_AT_type (0x0000004e "void ()") +; SPLIT: 0x00000038: DW_TAG_pointer_type +; SPLIT-NEXT: DW_AT_type (0x0000003d "void ()") -; SPLIT: 0x0000004e: DW_TAG_subroutine_type +; SPLIT: 0x0000003d: DW_TAG_subroutine_type ; SPLIT-NEXT: DW_AT_prototyped (true) +; SPLIT: 0x0000003e: DW_TAG_subprogram +; SPLIT-NEXT: DW_AT_low_pc (indexed (00000002) address = ) +; SPLIT-NEXT: DW_AT_high_pc (0x00000002) +; SPLIT-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) +; SPLIT-NEXT: DW_AT_name ("f2") +; SPLIT-NEXT: DW_AT_decl_file (0x01) +; SPLIT-NEXT: DW_AT_decl_line (2) +; SPLIT-NEXT: DW_AT_prototyped (true) +; SPLIT-NEXT: DW_AT_external (true) + ; SPLIT: 0x0000004f: NULL diff --git a/llvm/test/MC/WebAssembly/dwarfdump64.ll b/llvm/test/MC/WebAssembly/dwarfdump64.ll index 199e1f55931f..603f610c4dff 100644 --- a/llvm/test/MC/WebAssembly/dwarfdump64.ll +++ b/llvm/test/MC/WebAssembly/dwarfdump64.ll @@ -14,46 +14,46 @@ ; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) ; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) -; CHECK: 0x0000002a: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) -; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) -; CHECK-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) -; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("f2") -; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") -; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) -; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) -; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) - -; CHECK: 0x00000045: DW_TAG_variable +; CHECK: 0x0000002a: DW_TAG_variable ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("foo") -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000005a "int *") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000003f "int *") ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (4) ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) -; CHECK: 0x0000005a: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000005f "int") +; CHECK: 0x0000003f: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000044 "int") -; CHECK: 0x0000005f: DW_TAG_base_type +; CHECK: 0x00000044: DW_TAG_base_type ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("int") ; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1] (DW_ATE_signed) ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1] (0x04) -; CHECK: 0x00000066: DW_TAG_variable +; CHECK: 0x0000004b: DW_TAG_variable ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("ptr2") -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000007b "void (*)()") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000060 "void (*)()") ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (5) ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x8) -; CHECK: 0x0000007b: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000080 "void ()") +; CHECK: 0x00000060: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000065 "void ()") -; CHECK: 0x00000080: DW_TAG_subroutine_type +; CHECK: 0x00000065: DW_TAG_subroutine_type ; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) +; CHECK: 0x00000066: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) +; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) +; CHECK-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("f2") +; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") +; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) +; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) +; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) + ; CHECK: 0x00000081: NULL target triple = "wasm64-unknown-unknown" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll index 1c9617ab0d44..576c945eec3c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { @@ -37,7 +37,6 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x ; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] ; XOP-NEXT: ret <4 x i32> [[SH]] ; - %splat1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer %splat2 = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> zeroinitializer %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2 @@ -78,7 +77,6 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 ; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] ; XOP-NEXT: ret <16 x i16> [[SH]] ; - %splat1 = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> zeroinitializer %splat2 = shufflevector <16 x i16> %y, <16 x i16> poison, <16 x i32> zeroinitializer %sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2 @@ -87,14 +85,40 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 } define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { -; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] -; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] -; ALL-NEXT: ret <32 x i8> [[SH]] +; AVX1-LABEL: @vector_variable_shift_right_v32i8( +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX1-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX2-LABEL: @vector_variable_shift_right_v32i8( +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX2-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] +; +; XOP-LABEL: @vector_variable_shift_right_v32i8( +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] +; XOP-NEXT: ret <32 x i8> [[SH]] ; - %splat1 = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> zeroinitializer %splat2 = shufflevector <32 x i8> %y, <32 x i8> poison, <32 x i32> zeroinitializer %sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2 @@ -233,7 +257,6 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; XOP: exit: ; XOP-NEXT: ret void ; - entry: %cmp16 = icmp sgt i32 %count, 0 %wide.trip.count = zext i32 %count to i64 @@ -377,7 +400,6 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; XOP: exit: ; XOP-NEXT: ret void ; - entry: %i0 = insertelement <8 x i32> poison, i32 %rot0, i32 0 %s0 = shufflevector <8 x i32> %i0, <8 x i32> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll index 7d685b78f310..d0ceec32b90c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { @@ -85,12 +85,39 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 } define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { -; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] -; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] -; ALL-NEXT: ret <32 x i8> [[SH]] +; AVX1-LABEL: @vector_variable_shift_right_v32i8( +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX1-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX2-LABEL: @vector_variable_shift_right_v32i8( +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX2-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] +; +; XOP-LABEL: @vector_variable_shift_right_v32i8( +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] +; XOP-NEXT: ret <32 x i8> [[SH]] ; %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll index be3723b7d554..7fd50c4d80d6 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll @@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { -; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if_true: -; CHECK-NEXT: ret <16 x i8> [[MASK]] -; CHECK: if_false: -; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] -; CHECK-NEXT: ret <16 x i8> [[RES]] +; CHECK-SSE2-LABEL: @test_8bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-XOP-LABEL: @test_8bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-AVX-LABEL: @test_8bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX-NEXT: ret <16 x i8> [[RES]] ; %mask = shufflevector <16 x i8> %tmp, <16 x i8> poison, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll index a443e7100062..97e00023aa02 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { -; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if_true: -; CHECK-NEXT: ret <16 x i8> [[MASK]] -; CHECK: if_false: -; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] -; CHECK-NEXT: ret <16 x i8> [[RES]] +; CHECK-SSE2-LABEL: @test_8bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-XOP-LABEL: @test_8bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-AVX-LABEL: @test_8bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX-NEXT: ret <16 x i8> [[RES]] ; %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index 7dfa1fcc3a77..ccc9b38759c4 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -109,10 +109,16 @@ exit: declare void @baz() -; Verify that having the switch block as a determinator is handled correctly. -define i32 @main() { -; CHECK: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ] -; CHECK-NEXT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ] +; Do not jump-thread those paths where the determinator basic block does not +; precede the basic block that defines the switch condition. +; +; Otherwise, it is possible that the state defined in the determinator block +; defines the state for the next iteration of the loop, rather than for the +; current one. +define i32 @wrong_bb_order() { +; CHECK-LABEL: DFA Jump threading: wrong_bb_order +; CHECK-NOT: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ] +; CHECK-NOT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ] bb: %i = alloca [420 x i8], align 1 %i2 = getelementptr inbounds [420 x i8], [420 x i8]* %i, i64 0, i64 390 diff --git a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll index 27400cd4ed16..45c19c3588b0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll @@ -1,34 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test to make sure llvm.invariant.start calls are not treated as clobbers. ; RUN: opt < %s -basic-aa -dse -S | FileCheck %s declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly -; We cannot remove the store 1 to %p. -; FIXME: By the semantics of invariant.start, the store 3 to p is unreachable. +; We could remove either store here. The first store is dead in the +; conventional sense, because there is a later killing store. The second store +; is undefined behavior by the semantics of invariant.start, and as such +; unreachable. define void @test(i8 *%p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: store i8 3, i8* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; store i8 1, i8* %p, align 4 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p) store i8 3, i8* %p, align 4 ret void -; CHECK-LABEL: @test( -; CHECK-NEXT: store i8 1, i8* %p, align 4 -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p) -; CHECK-NEXT: store i8 3, i8* %p, align 4 -; CHECK-NEXT: ret void } ; FIXME: We should be able to remove the first store to p, even though p and q ; may alias. define void @test2(i8* %p, i8* %q) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: store i8 1, i8* [[P:%.*]], align 4 +; CHECK-NEXT: store i8 2, i8* [[Q:%.*]], align 4 +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 1, i8* [[Q]]) +; CHECK-NEXT: store i8 3, i8* [[P]], align 4 +; CHECK-NEXT: ret void +; store i8 1, i8* %p, align 4 store i8 2, i8* %q, align 4 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q) store i8 3, i8* %p, align 4 ret void -; CHECK-LABEL: @test2( -; CHECK-NEXT: store i8 1, i8* %p, align 4 -; CHECK-NEXT: store i8 2, i8* %q, align 4 -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q) -; CHECK-NEXT: store i8 3, i8* %p, align 4 -; CHECK-NEXT: ret void } diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll index 361b243f5121..cee2b076e8b5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -8,6 +8,7 @@ declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind declare void @llvm.init.trampoline(i8*, i8*, i8*) +declare void @llvm.matrix.column.major.store(<6 x float>, float*, i64, i1, i32, i32) define void @test1(i32* %Q, i32* %P) { ; CHECK-LABEL: @test1( @@ -193,6 +194,17 @@ define void @test11() { ret void } +; Specialized store intrinsics should be removed if dead. +define void @test_matrix_store(i64 %stride) { +; CHECK-LABEL: @test_matrix_store( +; CHECK-NEXT: ret void +; + %a = alloca [6 x float] + %cast = bitcast [6 x float]* %a to float* + call void @llvm.matrix.column.major.store(<6 x float> zeroinitializer, float* %cast, i64 %stride, i1 false, i32 3, i32 2) + ret void +} + ; %P doesn't escape, the DEAD instructions should be removed. declare void @test13f() define i32* @test13() { @@ -312,7 +324,7 @@ define noalias i8* @test23() nounwind uwtable ssp { ; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 ; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) [[ATTR3:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret i8* [[CALL]] ; %x = alloca [2 x i8], align 1 @@ -350,7 +362,7 @@ define i8* @test25(i8* %p) nounwind { ; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 ; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 ; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 -; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) [[ATTR6:#.*]] +; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #[[ATTR10:[0-9]+]] ; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 ; CHECK-NEXT: ret i8* [[Q]] ; diff --git a/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll b/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll new file mode 100644 index 000000000000..1e98fd7404a3 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=dse -S | FileCheck %s + +target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S32" + +%struct.ilist = type { i32, %struct.ilist* } + +; There is no dead store in this test. Make sure no store is deleted by DSE. +; Test case related to bug report PR52774. + +define %struct.ilist* @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[LIST_NEXT:%.*]] = phi %struct.ilist* [ null, [[TMP0]] ], [ [[LIST_NEW_ILIST_PTR:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[LIST_NEW_I8_PTR:%.*]] = tail call align 8 dereferenceable_or_null(8) i8* @malloc(i32 8) +; CHECK-NEXT: [[LIST_NEW_ILIST_PTR]] = bitcast i8* [[LIST_NEW_I8_PTR]] to %struct.ilist* +; CHECK-NEXT: [[GEP_NEW_VALUE:%.*]] = getelementptr inbounds [[STRUCT_ILIST:%.*]], %struct.ilist* [[LIST_NEW_ILIST_PTR]], i32 0, i32 0 +; CHECK-NEXT: store i32 42, i32* [[GEP_NEW_VALUE]], align 8 +; CHECK-NEXT: [[GEP_NEW_NEXT:%.*]] = getelementptr inbounds [[STRUCT_ILIST]], %struct.ilist* [[LIST_NEW_ILIST_PTR]], i32 0, i32 1 +; CHECK-NEXT: store %struct.ilist* [[LIST_NEXT]], %struct.ilist** [[GEP_NEW_NEXT]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], 10 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[LIST_LAST_ILIST_PTR:%.*]] = bitcast i8* [[LIST_NEW_I8_PTR]] to %struct.ilist* +; CHECK-NEXT: [[GEP_LIST_LAST_NEXT:%.*]] = getelementptr inbounds [[STRUCT_ILIST]], %struct.ilist* [[LIST_LAST_ILIST_PTR]], i32 0, i32 1 +; CHECK-NEXT: store %struct.ilist* null, %struct.ilist** [[GEP_LIST_LAST_NEXT]], align 4 +; CHECK-NEXT: [[GEP_LIST_NEXT_NEXT:%.*]] = getelementptr inbounds [[STRUCT_ILIST]], %struct.ilist* [[LIST_NEXT]], i32 0, i32 1 +; CHECK-NEXT: [[LOADED_PTR:%.*]] = load %struct.ilist*, %struct.ilist** [[GEP_LIST_NEXT_NEXT]], align 4 +; CHECK-NEXT: ret %struct.ilist* [[LOADED_PTR]] +; + br label %loop + +loop: + %iv = phi i32 [ 0, %0 ], [ %iv.next, %loop ] + %list.next = phi %struct.ilist* [ null, %0 ], [ %list.new.ilist.ptr, %loop ] + %list.new.i8.ptr = tail call align 8 dereferenceable_or_null(8) i8* @malloc(i32 8) + %list.new.ilist.ptr = bitcast i8* %list.new.i8.ptr to %struct.ilist* + %gep.new.value = getelementptr inbounds %struct.ilist, %struct.ilist* %list.new.ilist.ptr, i32 0, i32 0 + store i32 42, i32* %gep.new.value, align 8 + %gep.new.next = getelementptr inbounds %struct.ilist, %struct.ilist* %list.new.ilist.ptr, i32 0, i32 1 + store %struct.ilist* %list.next, %struct.ilist** %gep.new.next, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %cond = icmp eq i32 %iv.next, 10 + br i1 %cond, label %exit, label %loop + +exit: + %list.last.ilist.ptr = bitcast i8* %list.new.i8.ptr to %struct.ilist* + %gep.list.last.next = getelementptr inbounds %struct.ilist, %struct.ilist* %list.last.ilist.ptr, i32 0, i32 1 + store %struct.ilist* null, %struct.ilist** %gep.list.last.next, align 4 + %gep.list.next.next = getelementptr inbounds %struct.ilist, %struct.ilist* %list.next, i32 0, i32 1 + %loaded_ptr = load %struct.ilist*, %struct.ilist** %gep.list.next.next, align 4 + ret %struct.ilist* %loaded_ptr ; use loaded pointer +} + +; Function Attrs: inaccessiblememonly nounwind +declare noalias noundef align 8 i8* @malloc(i32 noundef) local_unnamed_addr #0 + +attributes #0 = { inaccessiblememonly nounwind} diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll index f3943326fc6f..e5630cca1aba 100644 --- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll @@ -650,3 +650,27 @@ define i8 @memset_optimized_access(i8* noalias %dst, i8* noalias %src) { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 16, i1 false) ret i8 %l } + +; The @use() call is a later non-removable store, but should not affect the +; removal of the store in the if block. +define void @later_non_removable_store(i1 %c, i8* %p) { +; CHECK-LABEL: @later_non_removable_store( +; CHECK-NEXT: store i8 1, i8* [[P:%.*]], align 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: call void @use(i8* [[P]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: ret void +; + store i8 1, i8* %p + br i1 %c, label %if, label %exit + +if: + store i8 1, i8* %p + br label %exit + +exit: + call void @use(i8* %p) argmemonly + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll index 13fd0e2ded0c..2b3838b00b3e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll @@ -7,6 +7,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) declare void @unknown() declare void @f(i8*) declare void @f2(i8*, i8*) +declare i8* @f3(i8*, i8*) ; Basic case for DSEing a trivially dead writing call define void @test_dead() { @@ -205,22 +206,38 @@ define void @test_unreleated_read() { ret void } -; But that removing a capture of an unrelated pointer isn't okay. -define void @test_neg_unreleated_capture() { -; CHECK-LABEL: @test_neg_unreleated_capture( +; Removing a capture is also okay. The capture can only be in the return value +; (which is unused) or written into the dead out parameter. +define void @test_unrelated_capture() { +; CHECK-LABEL: @test_unrelated_capture( +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %a2 = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + %bitcast2 = bitcast i32* %a2 to i8* + call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + ret void +} + +; Cannot remove call, as %bitcast2 is captured via the return value. +define i8 @test_neg_unrelated_capture_used_via_return() { +; CHECK-LABEL: @test_neg_unrelated_capture_used_via_return( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8* -; CHECK-NEXT: call void @f2(i8* nocapture writeonly [[BITCAST]], i8* readonly [[BITCAST2]]) #[[ATTR1]] -; CHECK-NEXT: ret void +; CHECK-NEXT: [[CAPTURE:%.*]] = call i8* @f3(i8* nocapture writeonly [[BITCAST]], i8* readonly [[BITCAST2]]) #[[ATTR1]] +; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[CAPTURE]], align 1 +; CHECK-NEXT: ret i8 [[V]] ; %a = alloca i32, align 4 %a2 = alloca i32, align 4 %bitcast = bitcast i32* %a to i8* %bitcast2 = bitcast i32* %a2 to i8* - call void @f2(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn - ret void + %capture = call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + %v = load i8, i8* %capture + ret i8 %v } ; As long as the result is unused, we can even remove reads of the alloca diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 3ca0bc438227..8ea47cf43070 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -146,3 +146,42 @@ define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) { store i8 0, i8* %addr.ld ret void } + + +; CHECK: define void @fptr_test1a(i8* nocapture readnone %p, void (i8*)* nocapture readonly %f) +define void @fptr_test1a(i8* %p, void (i8*)* %f) { + call void %f(i8* nocapture readnone %p) + ret void +} + +; CHECK: define void @fptr_test1b(i8* %p, void (i8*)* nocapture readonly %f) +define void @fptr_test1b(i8* %p, void (i8*)* %f) { + ; Can't infer readnone here because call might capture %p + call void %f(i8* readnone %p) + ret void +} + +; CHECK: define void @fptr_test1c(i8* readnone %p, void (i8*)* nocapture readonly %f) +define void @fptr_test1c(i8* %p, void (i8*)* %f) { + call void %f(i8* readnone %p) readonly + ret void +} + +; CHECK: define void @fptr_test2a(i8* nocapture readonly %p, void (i8*)* nocapture readonly %f) +define void @fptr_test2a(i8* %p, void (i8*)* %f) { + call void %f(i8* nocapture readonly %p) + ret void +} + +; CHECK: define void @fptr_test2b(i8* %p, void (i8*)* nocapture readonly %f) +define void @fptr_test2b(i8* %p, void (i8*)* %f) { + ; Can't infer readonly here because call might capture %p + call void %f(i8* readonly %p) + ret void +} + +; CHECK: define void @fptr_test2c(i8* readonly %p, void (i8*)* nocapture readonly %f) +define void @fptr_test2c(i8* %p, void (i8*)* %f) { + call void %f(i8* readonly %p) readonly + ret void +} diff --git a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll new file mode 100644 index 000000000000..1dc91d105ab3 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -indvars | FileCheck %s +; RUN: opt -S < %s -passes=indvars | FileCheck %s + +declare i1 @cond() +declare void @exit(i32 %code) + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_01(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp slt i32 [[IV]], [[X_SHIFTED]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp slt i32 %iv, %x.shifted + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp slt i32 %iv, %x + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_02(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp sgt i32 [[X_SHIFTED]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp sgt i32 %x.shifted, %iv + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp sgt i32 %x, %iv + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_03(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp ult i32 [[IV]], [[X_SHIFTED]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp ult i32 %iv, %x.shifted + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp ult i32 %iv, %x + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_04(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp ugt i32 [[X_SHIFTED]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp ugt i32 %x.shifted, %iv + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp ugt i32 %x, %iv + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +!0 = !{i32 0, i32 2147483647} diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll index a410a2a071aa..48c2e0a301a9 100644 --- a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll +++ b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll @@ -8,7 +8,7 @@ ; ; Generate mock model ; RUN: rm -rf %t -; RUN: %python %S/../../../../lib/Analysis/models/generate_mock_model.py %S/../../../../lib/Analysis/models/inlining/config.py %t +; RUN: %python %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t ; ; When the bounds are very wide ("no bounds"), all inlinings happen. ; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%t -training-log=- -tfutils-text-log -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS diff --git a/llvm/test/Transforms/Inline/ML/development-training-log.ll b/llvm/test/Transforms/Inline/ML/development-training-log.ll index c571c02b3fd5..7d3b59207582 100644 --- a/llvm/test/Transforms/Inline/ML/development-training-log.ll +++ b/llvm/test/Transforms/Inline/ML/development-training-log.ll @@ -2,7 +2,7 @@ ; REQUIRES: have_tf_api ; Generate mock model ; RUN: rm -rf %t -; RUN: %python %S/../../../../lib/Analysis/models/generate_mock_model.py %S/../../../../lib/Analysis/models/inlining/config.py %t +; RUN: %python %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t ; ; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -tfutils-text-log -ml-inliner-model-under-training=%t -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %s | FileCheck %s ; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -tfutils-text-log -ml-inliner-model-under-training=%t -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-output-spec-override=%S/Inputs/test_output_spec.json -S < %s | FileCheck %s --check-prefixes=EXTRA-OUTPUTS,CHECK diff --git a/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll index b755cf015a0d..d902b9e4c778 100644 --- a/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll +++ b/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll @@ -7,6 +7,6 @@ ; ; REQUIRES: have_tf_api ; RUN: rm -rf %t && mkdir %t -; RUN: %python %S/../../../../lib/Analysis/models/generate_mock_model.py %S/../../../../lib/Analysis/models/inlining/config.py %t +; RUN: %python %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=development -ml-inliner-model-under-training=%t -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll index c1c45b8e5ab3..c8987e11a723 100644 --- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll +++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll @@ -11,11 +11,9 @@ declare void @use(i8*, i32**) define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { ; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8( ; CHECK-NEXT: .master: -; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1, addrspace(5) -; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32* addrspace(5)* [[TMP0]] to i8 addrspace(5)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(5)* [[DOTSUB]] to i8* -; CHECK-NEXT: [[A_ON_STACK:%.*]] = addrspacecast i32* addrspace(5)* [[TMP0]] to i32** -; CHECK-NEXT: call void @use(i8* [[TMP1]], i32** [[A_ON_STACK]]) +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1 +; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32** [[TMP0]] to i8* +; CHECK-NEXT: call void @use(i8* [[DOTSUB]], i32** [[TMP0]]) ; CHECK-NEXT: ret void ; .master: @@ -25,3 +23,22 @@ define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { call void @use(i8* %0, i32** %a_on_stack) ret void } + +%struct.widget = type { [8 x i8] } + +define void @spam(i64* %arg1) { +; CHECK-LABEL: @spam( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [0 x [30 x %struct.widget]], [0 x [30 x %struct.widget]]* [[ALLOCA1]], i64 0, i64 0, i64 0 +; CHECK-NEXT: call void @zot(%struct.widget* [[GEP]]) +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca [30 x %struct.widget], i32 0, align 16 + %gep = getelementptr inbounds [30 x %struct.widget], [30 x %struct.widget]* %alloca, i64 0, i64 0 + call void @zot(%struct.widget* %gep) + ret void +} + +declare hidden void @zot(%struct.widget*) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index e1d411c7a5a8..791d4d9b2da8 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -1,33 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s ; Function Attrs: noinline uwtable -define dso_local i32 @foo(i32* nocapture writeonly %arg) local_unnamed_addr #0 { +define i32 @foo(i32* nocapture writeonly %arg) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[VAR:%.*]] = call i32 @baz() +; CHECK-NEXT: store i32 [[VAR]], i32* [[ARG:%.*]], align 4 +; CHECK-NEXT: [[VAR1:%.*]] = call i32 @baz() +; CHECK-NEXT: ret i32 [[VAR1]] +; bb: - %tmp = call i32 @baz() - store i32 %tmp, i32* %arg, align 4, !tbaa !2 - %tmp1 = call i32 @baz() - ret i32 %tmp1 + %var = call i32 @baz() + store i32 %var, i32* %arg, align 4 + %var1 = call i32 @baz() + ret i32 %var1 } -declare dso_local i32 @baz() local_unnamed_addr +declare i32 @baz() ; Function Attrs: uwtable ; This is an equivalent IR for a c-style example with a large function (foo) ; with out-params which are unused in the caller(test8). Note that foo is ; marked noinline to prevent IPO transforms. ; int foo(); -; +; ; extern int foo(int *out) __attribute__((noinline)); ; int foo(int *out) { ; *out = baz(); ; return baz(); ; } -; +; ; int test() { -; +; ; int notdead; ; if (foo(¬dead)) ; return 0; -; +; ; int dead; ; int tmp = foo(&dead); ; if (notdead) @@ -35,59 +43,167 @@ declare dso_local i32 @baz() local_unnamed_addr ; return bar(); ; } -; TODO: We should be able to sink the second call @foo at bb5 down to bb_crit_edge -define dso_local i32 @test() local_unnamed_addr #2 { -; CHECK-LABEL: test -; CHECK: bb5: -; CHECK: %tmp7 = call i32 @foo(i32* nonnull writeonly %tmp1) -; CHECK-NEXT: br i1 %tmp9, label %bb10, label %bb_crit_edge +; TODO: We should be able to sink the second call @foo at bb5 down to bb_crit_edge +define i32 @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR1:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32* [[VAR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[VAR2]]) +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @foo(i32* nonnull writeonly [[VAR]]) +; CHECK-NEXT: [[VAR4:%.*]] = icmp eq i32 [[VAR3]], 0 +; CHECK-NEXT: br i1 [[VAR4]], label [[BB5:%.*]], label [[BB14:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[VAR6:%.*]] = bitcast i32* [[VAR1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[VAR6]]) +; CHECK-NEXT: [[VAR8:%.*]] = load i32, i32* [[VAR]], align 4 +; CHECK-NEXT: [[VAR9:%.*]] = icmp eq i32 [[VAR8]], 0 +; CHECK-NEXT: [[VAR7:%.*]] = call i32 @foo(i32* nonnull writeonly [[VAR1]]) +; CHECK-NEXT: br i1 [[VAR9]], label [[BB10:%.*]], label [[BB_CRIT_EDGE:%.*]] +; CHECK: bb10: +; CHECK-NEXT: [[VAR11:%.*]] = call i32 @bar() +; CHECK-NEXT: br label [[BB12:%.*]] +; CHECK: bb_crit_edge: +; CHECK-NEXT: br label [[BB12]] +; CHECK: bb12: +; CHECK-NEXT: [[VAR13:%.*]] = phi i32 [ [[VAR11]], [[BB10]] ], [ [[VAR7]], [[BB_CRIT_EDGE]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[VAR6]]) +; CHECK-NEXT: br label [[BB14]] +; CHECK: bb14: +; CHECK-NEXT: [[VAR15:%.*]] = phi i32 [ [[VAR13]], [[BB12]] ], [ 0, [[BB:%.*]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[VAR2]]) +; CHECK-NEXT: ret i32 [[VAR15]] +; bb: - %tmp = alloca i32, align 4 - %tmp1 = alloca i32, align 4 - %tmp2 = bitcast i32* %tmp to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2) #4 - %tmp3 = call i32 @foo(i32* nonnull writeonly %tmp) - %tmp4 = icmp eq i32 %tmp3, 0 - br i1 %tmp4, label %bb5, label %bb14 + %var = alloca i32, align 4 + %var1 = alloca i32, align 4 + %var2 = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %var2) #4 + %var3 = call i32 @foo(i32* nonnull writeonly %var) + %var4 = icmp eq i32 %var3, 0 + br i1 %var4, label %bb5, label %bb14 bb5: ; preds = %bb - %tmp6 = bitcast i32* %tmp1 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp6) #4 - %tmp8 = load i32, i32* %tmp, align 4, !tbaa !2 - %tmp9 = icmp eq i32 %tmp8, 0 - %tmp7 = call i32 @foo(i32* nonnull writeonly %tmp1) - br i1 %tmp9, label %bb10, label %bb_crit_edge + %var6 = bitcast i32* %var1 to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %var6) #4 + %var8 = load i32, i32* %var, align 4 + %var9 = icmp eq i32 %var8, 0 + %var7 = call i32 @foo(i32* nonnull writeonly %var1) + br i1 %var9, label %bb10, label %bb_crit_edge bb10: ; preds = %bb5 - %tmp11 = call i32 @bar() + %var11 = call i32 @bar() br label %bb12 bb_crit_edge: br label %bb12 bb12: ; preds = %bb10, %bb5 - %tmp13 = phi i32 [ %tmp11, %bb10 ], [ %tmp7, %bb_crit_edge ] - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp6) #4 + %var13 = phi i32 [ %var11, %bb10 ], [ %var7, %bb_crit_edge ] + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %var6) #4 br label %bb14 bb14: ; preds = %bb12, %bb - %tmp15 = phi i32 [ %tmp13, %bb12 ], [ 0, %bb ] - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2) - ret i32 %tmp15 + %var15 = phi i32 [ %var13, %bb12 ], [ 0, %bb ] + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %var2) + ret i32 %var15 +} + +declare i32 @unknown(i32* %dest) + +define i32 @sink_to_use(i1 %c) { +; CHECK-LABEL: @sink_to_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @neg_infinite_loop(i1 %c) { +; CHECK-LABEL: @neg_infinite_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @neg_throw(i1 %c) { +; CHECK-LABEL: @neg_throw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) argmemonly willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @neg_unknown_write(i1 %c) { +; CHECK-LABEL: @neg_unknown_write( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 } declare i32 @bar() -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3 - -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3 - -attributes #0 = { noinline uwtable argmemonly nounwind willreturn writeonly } -attributes #3 = { argmemonly nofree nosync nounwind willreturn } -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!"clang version 10.0.0-4ubuntu1 "} -!2 = !{!3, !3, i64 0} -!3 = !{!"int", !4, i64 0} -!4 = !{!"omnipotent char", !5, i64 0} -!5 = !{!"Simple C++ TBAA"} +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) diff --git a/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll b/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll index f2c009ac3796..6e2433336d1a 100644 --- a/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll +++ b/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll @@ -7,6 +7,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) declare void @unknown() declare void @f(i8*) declare void @f2(i8*, i8*) +declare i8* @f3(i8*, i8*) ; Basic case for DSEing a trivially dead writing call define void @test_dead() { @@ -192,22 +193,38 @@ define void @test_unreleated_read() { ret void } -; But that removing a capture of an unrelated pointer isn't okay. -define void @test_neg_unreleated_capture() { -; CHECK-LABEL: @test_neg_unreleated_capture( +; Removing a capture is also okay. The capture can only be in the return value +; (which is unused) or written into the dead out parameter. +define void @test_unrelated_capture() { +; CHECK-LABEL: @test_unrelated_capture( +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %a2 = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + %bitcast2 = bitcast i32* %a2 to i8* + call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + ret void +} + +; Cannot remove call, as %bitcast2 is captured via the return value. +define i8 @test_neg_unrelated_capture_used_via_return() { +; CHECK-LABEL: @test_neg_unrelated_capture_used_via_return( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8* -; CHECK-NEXT: call void @f2(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull readonly [[BITCAST2]]) #[[ATTR1]] -; CHECK-NEXT: ret void +; CHECK-NEXT: [[CAPTURE:%.*]] = call i8* @f3(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull readonly [[BITCAST2]]) #[[ATTR1]] +; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[CAPTURE]], align 1 +; CHECK-NEXT: ret i8 [[V]] ; %a = alloca i32, align 4 %a2 = alloca i32, align 4 %bitcast = bitcast i32* %a to i8* %bitcast2 = bitcast i32* %a2 to i8* - call void @f2(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn - ret void + %capture = call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + %v = load i8, i8* %capture + ret i8 %v } ; As long as the result is unused, we can even remove reads of the alloca diff --git a/llvm/test/Transforms/InstSimplify/and.ll b/llvm/test/Transforms/InstSimplify/and.ll index 73b439c381d3..4900d48c5ffe 100644 --- a/llvm/test/Transforms/InstSimplify/and.ll +++ b/llvm/test/Transforms/InstSimplify/and.ll @@ -132,15 +132,11 @@ define i8 @or_or_not_no_common_op(i8 %x, i8 %y, i8 %z) { ret i8 %and } -; ((A | B) ^ A ) & ((A | B) ^ B) +; ((X | Y) ^ X ) & ((X | Y) ^ Y) --> 0 define i8 @or_xor(i8 %x, i8 %y) { ; CHECK-LABEL: @or_xor( -; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[OR]], [[X]] -; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[OR]], [[Y]] -; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], [[XOR2]] -; CHECK-NEXT: ret i8 [[AND]] +; CHECK-NEXT: ret i8 0 ; %or = or i8 %x, %y %xor1 = xor i8 %or, %x @@ -149,15 +145,11 @@ define i8 @or_xor(i8 %x, i8 %y) { ret i8 %and } -; ((A | B) ^ B ) & ((A | B) ^ A) +; ((X | Y) ^ Y ) & ((X | Y) ^ X) --> 0 define i8 @or_xor_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @or_xor_commute1( -; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[OR]], [[X]] -; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[OR]], [[Y]] -; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR2]], [[XOR1]] -; CHECK-NEXT: ret i8 [[AND]] +; CHECK-NEXT: ret i8 0 ; %or = or i8 %x, %y %xor1 = xor i8 %or, %x @@ -166,15 +158,11 @@ define i8 @or_xor_commute1(i8 %x, i8 %y) { ret i8 %and } -; (A ^ (A | B) ) & (B ^ (A | B)) +; (X ^ (X | Y) ) & (Y ^ (X | Y)) --> 0 define i71 @or_xor_commute2(i71 %x, i71 %y) { ; CHECK-LABEL: @or_xor_commute2( -; CHECK-NEXT: [[OR:%.*]] = or i71 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor i71 [[X]], [[OR]] -; CHECK-NEXT: [[XOR2:%.*]] = xor i71 [[Y]], [[OR]] -; CHECK-NEXT: [[AND:%.*]] = and i71 [[XOR1]], [[XOR2]] -; CHECK-NEXT: ret i71 [[AND]] +; CHECK-NEXT: ret i71 0 ; %or = or i71 %x, %y %xor1 = xor i71 %x, %or @@ -183,15 +171,11 @@ define i71 @or_xor_commute2(i71 %x, i71 %y) { ret i71 %and } -; (B ^ (A | B) ) & (A ^ (A | B)) +; (Y ^ (X | Y) ) & (X ^ (X | Y)) --> 0 define <2 x i64> @or_xor_commute3(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @or_xor_commute3( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i64> [[Y]], [[OR]] -; CHECK-NEXT: [[XOR2:%.*]] = xor <2 x i64> [[X]], [[OR]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[XOR1]], [[XOR2]] -; CHECK-NEXT: ret <2 x i64> [[AND]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %or = or <2 x i64> %x, %y %xor1 = xor <2 x i64> %y, %or @@ -199,3 +183,86 @@ define <2 x i64> @or_xor_commute3(<2 x i64> %x, <2 x i64> %y) { %and = and <2 x i64> %xor1, %xor2 ret <2 x i64> %and } + +; ((X | Y) ^ X ) & (Y ^ (X | Y)) --> 0 + +define i32 @or_xor_commute4(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute4( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %or, %x + %xor2 = xor i32 %y, %or + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; ((X | Y) ^ Y ) & (X ^ (X | Y)) --> 0 + +define i32 @or_xor_commute5(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute5( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %or, %y + %xor2 = xor i32 %x, %or + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; (X ^ (X | Y) ) & ((X | Y) ^ Y) --> 0 + +define i32 @or_xor_commute6(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute6( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %x, %or + %xor2 = xor i32 %or, %y + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; (Y ^ (X | Y) ) & ((X | Y) ^ X) --> 0 + +define i32 @or_xor_commute7(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute7( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %y, %or + %xor2 = xor i32 %or, %x + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; (Y ^ (X | Y) ) & ((X | Y) ^ X) --> 0 + +define i32 @or_xor_complex_op(i32 %x, i32 %in) { +; CHECK-LABEL: @or_xor_complex_op( +; CHECK-NEXT: ret i32 0 +; + %y = or i32 %in, 1 + %or = or i32 %x, %y + %xor1 = xor i32 %y, %or + %xor2 = xor i32 %or, %x + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +define i32 @or_xor_limitation(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_limitation( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[Y]], [[OR1]] +; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[OR2]], [[X]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR1]], [[XOR2]] +; CHECK-NEXT: ret i32 [[AND]] +; + %or1 = or i32 %y, %x + %or2 = or i32 %x, %y + %xor1 = xor i32 %y, %or1 + %xor2 = xor i32 %or2, %x + %and = and i32 %xor1, %xor2 + ret i32 %and +} diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll index 4db5fbff30af..5b8d615a6770 100644 --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -282,6 +282,7 @@ entry: define double @fadd_05() #0 { ; CHECK-LABEL: @fadd_05( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret double 3.000000e+00 ; entry: @@ -293,6 +294,7 @@ entry: define double @fadd_06() #0 { ; CHECK-LABEL: @fadd_06( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret double 3.000000e+00 ; entry: @@ -415,6 +417,120 @@ entry: } +; When exceptions are ignored, comparison of constants can be folded, even for (signaling) NaNs. +define i1 @cmp_eq_01() #0 { +; CHECK-LABEL: @cmp_eq_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 1.000000e+00, double 2.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 1.0, double 2.0, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_02() #0 { +; CHECK-LABEL: @cmp_eq_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.000000e+00, double 2.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 2.0, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_03() #0 { +; CHECK-LABEL: @cmp_eq_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.000000e+00, double 0x7FF8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 0x7ff8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_04() #0 { +; CHECK-LABEL: @cmp_eq_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.000000e+00, double 0x7FF4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 0x7ff4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_05() #0 { +; CHECK-LABEL: @cmp_eq_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.000000e+00, double 0x7FF8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.0, double 0x7ff8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_06() #0 { +; CHECK-LABEL: @cmp_eq_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.000000e+00, double 0x7FF4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.0, double 0x7ff4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +; Compare with SNAN is NOT folded if the exception behavior mode is not 'ignore'. +define i1 @cmp_eq_nan_01() #0 { +; CHECK-LABEL: @cmp_eq_nan_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7FF4000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7ff4000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +define i1 @cmp_eq_nan_02() #0 { +; CHECK-LABEL: @cmp_eq_nan_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7FF4000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7ff4000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +; Compare with QNAN is folded for fcmp but is NOT folded for fcmps if the exception behavior mode is not 'ignore'. +define i1 @cmp_eq_nan_03() #0 { +; CHECK-LABEL: @cmp_eq_nan_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7FF8000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7ff8000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +define i1 @cmp_eq_nan_04() #0 { +; CHECK-LABEL: @cmp_eq_nan_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7FF8000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7ff8000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + + attributes #0 = { strictfp } declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) @@ -433,4 +549,6 @@ declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll index 82101a4ef828..39222b1916af 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll @@ -12,6 +12,7 @@ define float @fdiv_constant_fold() #0 { define float @fdiv_constant_fold_strict() #0 { ; CHECK-LABEL: @fdiv_constant_fold_strict( +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]] ; CHECK-NEXT: ret float 1.500000e+00 ; %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -21,7 +22,7 @@ define float @fdiv_constant_fold_strict() #0 { define float @fdiv_constant_fold_strict2() #0 { ; CHECK-LABEL: @fdiv_constant_fold_strict2( -; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret float [[F]] ; %f = call float @llvm.experimental.constrained.fdiv.f32(float 2.0, float 3.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -39,6 +40,7 @@ define float @frem_constant_fold() #0 { define float @frem_constant_fold_strict() #0 { ; CHECK-LABEL: @frem_constant_fold_strict( +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.frem.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret float 1.000000e+00 ; %f = call float @llvm.experimental.constrained.frem.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll index 61b2cafc63ad..61d06e964ac5 100644 --- a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll +++ b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll @@ -363,6 +363,7 @@ define float @fold_fadd_qnan_qnan_ebmaytrap() #0 { define float @fold_fadd_qnan_qnan_ebstrict() #0 { ; CHECK-LABEL: @fold_fadd_qnan_qnan_ebstrict( +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float 0x7FF8000000000000, float 0x7FF8000000000000, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret float 0x7FF8000000000000 ; %add = call float @llvm.experimental.constrained.fadd.f32(float 0x7ff8000000000000, float 0x7ff8000000000000, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll new file mode 100644 index 000000000000..3e08b41744f8 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes='default' -mtriple=powerpc64le-- -o - %s | \ +; RUN: FileCheck %s + +%0 = type <{ double }> + +define dso_local void @test(i32* %arg) #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB16:%.*]] +; CHECK: bb16: +; CHECK-NEXT: [[I20:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20]]) +; CHECK-NEXT: [[I24_ELT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_1]]) +; CHECK-NEXT: [[I24_ELT_1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_1]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_1]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_1]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_1]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_2]]) +; CHECK-NEXT: [[I24_ELT_2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_2]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_2]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_2]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_2]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_3:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_3]]) +; CHECK-NEXT: [[I24_ELT_3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_3]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_3]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_3]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_3]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_4:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_4]]) +; CHECK-NEXT: [[I24_ELT_4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_4]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_4]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_4]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_4]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_5:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_5:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_5]]) +; CHECK-NEXT: [[I24_ELT_5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_5]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_5]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_5]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_5]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_6:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_6:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_6]]) +; CHECK-NEXT: [[I24_ELT_6:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_6]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_6]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_6:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_6]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_6]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_7:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_7:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_7]]) +; CHECK-NEXT: [[I24_ELT_7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_7]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_7]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_7]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_7]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_8:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_8:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_8]]) +; CHECK-NEXT: [[I24_ELT_8:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_8]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_8]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_8:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_8]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_8]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_9:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_9:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_9]]) +; CHECK-NEXT: [[I24_ELT_9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_9]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_9]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_9]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_9]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_10:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_10:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_10]]) +; CHECK-NEXT: [[I24_ELT_10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_10]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_10]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_10]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_10]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_11:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_11:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_11]]) +; CHECK-NEXT: [[I24_ELT_11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_11]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_11]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_11]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_11]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_12:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_12:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_12]]) +; CHECK-NEXT: [[I24_ELT_12:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_12]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_12]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_12:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_12]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_12]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_13:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_13:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_13]]) +; CHECK-NEXT: [[I24_ELT_13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_13]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_13]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_13]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_13]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_14:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_14:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_14]]) +; CHECK-NEXT: [[I24_ELT_14:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_14]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_14]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_14:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_14]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_14]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_15:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_15:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_15]]) +; CHECK-NEXT: [[I24_ELT_15:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_15]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_15]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_15:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_15]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_15]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: br label [[BB16]], !llvm.loop [[LOOP0:![0-9]+]] +; +bb: + %i = alloca i32*, align 8 + store i32* %arg, i32** %i, align 8 + %i1 = alloca [0 x %0]*, align 8 + %i2 = alloca double*, align 8 + %i3 = alloca i32, align 4 + %i4 = alloca i32, align 4 + %i5 = alloca i64, align 8 + %i6 = alloca i64, align 8 + %i7 = alloca <256 x i1>, align 32 + %i8 = load i32*, i32** %i, align 8 + %i9 = load i32, i32* %i8, align 4 + %i10 = sub nsw i32 %i9, 0 + store i32 %i10, i32* %i4, align 4 + %i11 = load i32, i32* %i4, align 4 + %i12 = ashr i32 %i11, 5 + %i13 = sext i32 %i12 to i64 + %i14 = load i64, i64* %i6, align 8 + %i15 = sub nsw i64 %i14, 1 + br label %bb16 + +bb16: ; preds = %bb16, %bb + %i17 = load i64, i64* %i5, align 8 + %i18 = icmp sge i64 %i17, 1 + %i19 = getelementptr i8, i8* null, i64 -32 + %i20 = call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i19) + store <256 x i1> %i20, <256 x i1>* %i7, align 32 + %i21 = getelementptr inbounds i8, i8* null, i64 48 + %i22 = bitcast i8* %i21 to <2 x double>* + %i23 = load <256 x i1>, <256 x i1>* %i7, align 32 + %i24 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i23) + %i25 = bitcast <2 x double>* %i22 to { <16 x i8>, <16 x i8> }* + store { <16 x i8>, <16 x i8> } %i24, { <16 x i8>, <16 x i8> }* %i25, align 16 + br label %bb16, !llvm.loop !1 +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*) #1 + +; Function Attrs: nounwind readnone +declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>) #2 + +attributes #0 = { "no-trapping-math"="true" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind readnone } + +!1 = distinct !{!1, !2, !3, !4} +!2 = !{!"llvm.loop.vectorize.width", i32 1} +!3 = !{!"llvm.loop.interleave.count", i32 1} +!4 = !{!"llvm.loop.unroll.count", i32 16} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll new file mode 100644 index 000000000000..3349d4554491 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=riscv64 -mattr=+experimental-v -loop-vectorize < %s | FileCheck %s + +; Make sure we don't unroll scalar loops in the loop vectorizer. +; +define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind { +; CHECK-LABEL: @small_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV1:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[INARRAY:%.*]], i32 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], 6 +; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP1]], align 4 +; CHECK-NEXT: [[IV1]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV1]], [[SIZE]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %0 = icmp sgt i32 %size, 0 + br i1 %0, label %loop, label %exit + +loop: ; preds = %entry, %loop + %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ] + %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv + %2 = load i32, i32* %1, align 4 + %3 = add nsw i32 %2, 6 + store i32 %3, i32* %1, align 4 + %iv1 = add i32 %iv, 1 + %cond = icmp eq i32 %iv1, %size + br i1 %cond, label %exit, label %loop + +exit: ; preds = %loop, %entry + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll index 07201c0dfd78..47ac8e424fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll @@ -7,9 +7,9 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; in deterministic order. ; CHECK-LABEL: @foo( ; CHECK: vector.body: -; CHECK: icmp ule <4 x i64> -; CHECK-NEXT: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 +; CHECK: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 ; CHECK-NEXT: %[[VAR2:.*]] = add <4 x i32> %vec.phi, +; CHECK-NEXT: icmp ule <4 x i64> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR2]], <4 x i32> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR1]], <4 x i32> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 70920bd2a986..e487b3a34f78 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -33,9 +33,9 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 2270e4fb406b..49d2c8329b30 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -1370,26 +1370,17 @@ TEST_F(AArch64GISelMITest, FewerElementsAnd) { CHECK: [[IMP_DEF0:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF CHECK: [[IMP_DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF CHECK: [[VALUE0:%[0-9]+]]:_(s32), [[VALUE1:%[0-9]+]]:_(s32), [[VALUE2:%[0-9]+]]:_(s32), [[VALUE3:%[0-9]+]]:_(s32), [[VALUE4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF0]]:_(<5 x s32>) - CHECK: [[IMP_DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF CHECK: [[VECTOR0:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE0]]:_(s32), [[VALUE1]]:_(s32) CHECK: [[VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE2]]:_(s32), [[VALUE3]]:_(s32) - CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE4]]:_(s32), [[IMP_DEF2]]:_(s32) - CHECK: [[IMP_DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF CHECK: [[VALUE5:%[0-9]+]]:_(s32), [[VALUE6:%[0-9]+]]:_(s32), [[VALUE7:%[0-9]+]]:_(s32), [[VALUE8:%[0-9]+]]:_(s32), [[VALUE9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF1]]:_(<5 x s32>) - CHECK: [[IMP_DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32) - CHECK: [[VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32) - CHECK: [[VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE9]]:_(s32), [[IMP_DEF4]]:_(s32) - CHECK: [[IMP_DEF5:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - - CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR3]]:_ - CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR4]]:_ - CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR2]]:_, [[VECTOR5]]:_ - CHECK: [[IMP_DEF6:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - - CHECK: [[VECTOR6:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>) - CHECK: [[VECTOR7:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>) - CHECK: [[VECTOR8:%[0-9]+]]:_(<5 x s32>), [[VECTOR9:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[VECTOR7]]:_(<10 x s32>) + CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32) + CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32) + CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR2]]:_ + CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR3]]:_ + CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[VALUE4]]:_, [[VALUE9]]:_ + CHECK: [[AND0_E0:%[0-9]+]]:_(s32), [[AND0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND0]]:_(<2 x s32>) + CHECK: [[AND1_E0:%[0-9]+]]:_(s32), [[AND1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]]:_(<2 x s32>) + CHECK: [[RESULT:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[AND0_E0]]:_(s32), [[AND0_E1]]:_(s32), [[AND1_E0]]:_(s32), [[AND1_E1]]:_(s32), [[AND2]]:_(s32) )"; // Check @@ -1428,12 +1419,19 @@ TEST_F(AArch64GISelMITest, MoreElementsAnd) { auto CheckStr = R"( CHECK: [[BITCAST0:%[0-9]+]]:_(<2 x s32>) = G_BITCAST CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST - CHECK: [[IMP_DEF0:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CONCAT0:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>) - CHECK: [[IMP_DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CONCAT1:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>) - CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[CONCAT0]]:_, [[CONCAT1]]:_ - CHECK: (<2 x s32>) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>) + + CHECK: [[BITCAST0_E0:%[0-9]+]]:_(s32), [[BITCAST0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST0]]:_(<2 x s32>) + CHECK: [[IMP_DEF0:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST0_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST0_E0]]:_(s32), [[BITCAST0_E1]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32) + + CHECK: [[BITCAST1_E0:%[0-9]+]]:_(s32), [[BITCAST1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]]:_(<2 x s32>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST1_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST1_E0]]:_(s32), [[BITCAST1_E1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32) + + CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[BITCAST0_LARGE]]:_, [[BITCAST1_LARGE]]:_ + + CHECK: [[AND_E0:%[0-9]+]]:_(s32), [[AND_E1:%[0-9]+]]:_(s32), [[AND_E2:%[0-9]+]]:_(s32), [[AND_E3:%[0-9]+]]:_(s32), [[AND_E4:%[0-9]+]]:_(s32), [[AND_E5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>) + CHECK: (<2 x s32>) = G_BUILD_VECTOR [[AND_E0]]:_(s32), [[AND_E1]]:_(s32) )"; EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; @@ -1509,26 +1507,26 @@ TEST_F(AArch64GISelMITest, FewerElementsPhi) { auto CheckStr = R"( CHECK: [[INITVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - CHECK: [[EXTRACT0:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 0 - CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 64 - CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 128 + CHECK: [[INITVAL_E0:%[0-9]+]]:_(s32), [[INITVAL_E1:%[0-9]+]]:_(s32), [[INITVAL_E2:%[0-9]+]]:_(s32), [[INITVAL_E3:%[0-9]+]]:_(s32), [[INITVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INITVAL]]:_(<5 x s32>) + CHECK: [[INITVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E0]]:_(s32), [[INITVAL_E1]]:_(s32) + CHECK: [[INITVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E2]]:_(s32), [[INITVAL_E3]]:_(s32) CHECK: G_BRCOND CHECK: [[MIDVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 0 - CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 64 - CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 128 + CHECK: [[MIDVAL_E0:%[0-9]+]]:_(s32), [[MIDVAL_E1:%[0-9]+]]:_(s32), [[MIDVAL_E2:%[0-9]+]]:_(s32), [[MIDVAL_E3:%[0-9]+]]:_(s32), [[MIDVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MIDVAL]]:_(<5 x s32>) + CHECK: [[MIDVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E0]]:_(s32), [[MIDVAL_E1]]:_(s32) + CHECK: [[MIDVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E2]]:_(s32), [[MIDVAL_E3]]:_(s32) CHECK: G_BR - CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT0]]:_(<2 x s32>), %bb.0, [[EXTRACT3]]:_(<2 x s32>), %bb.1 - CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT1]]:_(<2 x s32>), %bb.0, [[EXTRACT4]]:_(<2 x s32>), %bb.1 - CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1 - - CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI - + CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E01]]:_(<2 x s32>), %bb.0, [[MIDVAL_E01]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E23]]:_(<2 x s32>), %bb.0, [[MIDVAL_E23]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[INITVAL_E4]]:_(s32), %bb.0, [[MIDVAL_E4]]:_(s32), %bb.1 CHECK: [[UNMERGE0:%[0-9]+]]:_(s32), [[UNMERGE1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI0]]:_(<2 x s32>) CHECK: [[UNMERGE2:%[0-9]+]]:_(s32), [[UNMERGE3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI1]]:_(<2 x s32>) CHECK: [[BV:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UNMERGE0]]:_(s32), [[UNMERGE1]]:_(s32), [[UNMERGE2]]:_(s32), [[UNMERGE3]]:_(s32), [[PHI2]]:_(s32) + + CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI + CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[BV]]:_, [[BV]]:_ )"; @@ -3196,14 +3194,8 @@ TEST_F(AArch64GISelMITest, LowerInsert) { CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_ CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[OR]] - CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[V2S32]] - CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[S32]] - CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT - CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]]:_, [[C]]:_(s64) - CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT - CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[BITCAST]]:_, [[C]]:_ - CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_ - CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[OR]] + CHECK: [[V2S32_E0:%[0-9]+]]:_(s32), [[V2S32_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[V2S32]] + CHECK: [[BV:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[V2S32_E0]]:_(s32), [[S32]]:_(s32) )"; // Check @@ -3711,20 +3703,22 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) { DefineLegalizerInfo(A, {}); // Make sure that G_FREEZE is narrowed using unmerge/extract - LLT S16{LLT::scalar(16)}; LLT S32{LLT::scalar(32)}; LLT S33{LLT::scalar(33)}; + LLT S48{LLT::scalar(48)}; LLT S64{LLT::scalar(64)}; LLT V2S16{LLT::fixed_vector(2, 16)}; - LLT V2S32{LLT::fixed_vector(2, 32)}; + LLT V3S16{LLT::fixed_vector(3, 16)}; + LLT V4S16{LLT::fixed_vector(4, 16)}; auto Trunc = B.buildTrunc(S33, {Copies[0]}); - auto Vector = B.buildBitcast(V2S32, Copies[0]); + auto Trunc1 = B.buildTrunc(S48, {Copies[0]}); + auto Vector = B.buildBitcast(V3S16, Trunc1); auto FreezeScalar = B.buildInstr(TargetOpcode::G_FREEZE, {S64}, {Copies[0]}); auto FreezeOdd = B.buildInstr(TargetOpcode::G_FREEZE, {S33}, {Trunc}); - auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector}); - auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector}); + auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector}); + auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector}); AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; @@ -3736,54 +3730,48 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) { EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeScalar, 0, S32)); + // This should be followed by narrowScalar to S32. B.setInsertPt(*EntryMBB, FreezeOdd->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeOdd, 0, S32)); + Helper.widenScalar(*FreezeOdd, 0, S64)); B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeVector, 0, V2S16)); + Helper.fewerElementsVector(*FreezeVector, 0, V2S16)); + // This should be followed by fewerElements to V2S16. B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeVector1, 0, S16)); + Helper.moreElementsVector(*FreezeVector1, 0, V4S16)); const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]] - CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] + CHECK: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]] + CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]] CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]] CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[UV]] CHECK: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[UV1]] CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE]]:_(s32), [[FREEZE1]] - CHECK: (s1) = G_UNMERGE_VALUES [[TRUNC]]:_(s33) - CHECK: [[UNDEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES - CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES - CHECK: [[UNDEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[MV1]] - CHECK: [[FREEZE3:%[0-9]+]]:_(s32) = G_FREEZE [[MV2]] - CHECK: [[UNDEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[MV3:%[0-9]+]]:_(s1056) = G_MERGE_VALUES [[FREEZE2]]:_(s32), [[FREEZE3]]:_(s32), [[UNDEF2]] - CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[MV3]] - - CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]] - CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]] - CHECK: [[FREEZE4:%[0-9]+]]:_(s32) = G_FREEZE [[UV2]] - CHECK: [[FREEZE5:%[0-9]+]]:_(s32) = G_FREEZE [[UV3]] - CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE4]]:_(s32), [[FREEZE5]]:_(s32) - CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV4]] - - CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]] - CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST3]] - CHECK: [[FREEZE6:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]] - CHECK: [[FREEZE7:%[0-9]+]]:_(s16) = G_FREEZE [[UV5]] - CHECK: [[FREEZE8:%[0-9]+]]:_(s16) = G_FREEZE [[UV6]] - CHECK: [[FREEZE9:%[0-9]+]]:_(s16) = G_FREEZE [[UV7]] - CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE6]]:_(s16), [[FREEZE7]]:_(s16), [[FREEZE8]]:_(s16), [[FREEZE9]] - CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV5]] + CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]] + CHECK: [[FREEZE2:%[0-9]+]]:_(s64) = G_FREEZE [[ANYEXT]] + CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[FREEZE2]] + + CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]] + CHECK: [[BV:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]]:_(s16), [[UV3]]:_(s16) + CHECK: [[FREEZE3:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[BV]] + CHECK: [[FREEZE4:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]] + CHECK: [[FREEZE3_E0:%[0-9]+]]:_(s16), [[FREEZE3_E1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE3]] + CHECK: [[BV1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE3_E0]]:_(s16), [[FREEZE3_E1]]:_(s16), [[FREEZE4]]:_(s16) + + CHECK: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]] + CHECK: [[IMP_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + CHECK: [[BV1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV5]]:_(s16), [[UV6]]:_(s16), [[UV7]]:_(s16), [[IMP_DEF]]:_(s16) + CHECK: [[FREEZE5:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[BV1]] + CHECK: [[FREEZE5_E0:%[0-9]+]]:_(s16), [[FREEZE5_E1:%[0-9]+]]:_(s16), [[FREEZE5_E2:%[0-9]+]]:_(s16), [[FREEZE5_E3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE5]] + CHECK: [[BV2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE5_E0]]:_(s16), [[FREEZE5_E1]]:_(s16), [[FREEZE5_E2]]:_(s16) )"; // Check @@ -3869,10 +3857,12 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] - CHECK: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CV:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST]]:_(<2 x s32>), [[UNDEF]] - CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[CV]] - CHECK: [[EXTR0:%[0-9]+]]:_(<2 x s32>), [[EXTR1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) + CHECK: [[BITCAST_E0:%[0-9]+]]:_(s32), [[BITCAST_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]]:_(<2 x s32>) + CHECK: [[IMP_DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST_LARGE:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST_E0]]:_(s32), [[BITCAST_E1]]:_(s32), [[IMP_DEF]]:_(s32), [[IMP_DEF]]:_(s32) + CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BITCAST_LARGE]] + CHECK: [[FREEZE_E0:%[0-9]+]]:_(s32), [[FREEZE_E1:%[0-9]+]]:_(s32), [[FREEZE_E2:%[0-9]+]]:_(s32), [[FREEZE_E3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) + CHECK: (<2 x s32>) = G_BUILD_VECTOR [[FREEZE_E0]]:_(s32), [[FREEZE_E1]]:_(s32) )"; // Check @@ -4040,14 +4030,19 @@ TEST_F(AArch64GISelMITest, moreElementsShuffle) { CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY CHECK: [[BV1:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR CHECK: [[BV2:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR - CHECK: [[IMPDEF1:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[INSERT1:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF1]]:_, [[BV1]]:_(<6 x s64>), 0 - CHECK: [[IMPDEF2:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[INSERT2:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF2]]:_, [[BV2]]:_(<6 x s64>), 0 - CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[INSERT1]]:_(<8 x s64>), [[INSERT2]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) - CHECK: [[IMPDEF3:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[CONCAT:%[0-9]+]]:_(<24 x s64>) = G_CONCAT_VECTORS [[SHUF]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>) - CHECK: [[UNMERGE:%[0-9]+]]:_(<6 x s64>), [[UNMERGE2:%[0-9]+]]:_(<6 x s64>), [[UNMERGE3:%[0-9]+]]:_(<6 x s64>), [[UNMERGE4:%[0-9]+]]:_(<6 x s64>) = G_UNMERGE_VALUES [[CONCAT]]:_(<24 x s64>) + + CHECK: [[BV1_E0:%[0-9]+]]:_(s64), [[BV1_E1:%[0-9]+]]:_(s64), [[BV1_E2:%[0-9]+]]:_(s64), [[BV1_E3:%[0-9]+]]:_(s64), [[BV1_E4:%[0-9]+]]:_(s64), [[BV1_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV1]]:_(<6 x s64>) + CHECK: [[IMP_DEF0:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + CHECK: [[BV1_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV1_E0]]:_(s64), [[BV1_E1]]:_(s64), [[BV1_E2]]:_(s64), [[BV1_E3]]:_(s64), [[BV1_E4]]:_(s64), [[BV1_E5]]:_(s64), [[IMP_DEF0]]:_(s64), [[IMP_DEF0]]:_(s64) + + CHECK: [[BV2_E0:%[0-9]+]]:_(s64), [[BV2_E1:%[0-9]+]]:_(s64), [[BV2_E2:%[0-9]+]]:_(s64), [[BV2_E3:%[0-9]+]]:_(s64), [[BV2_E4:%[0-9]+]]:_(s64), [[BV2_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV2]]:_(<6 x s64>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + CHECK: [[BV2_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV2_E0]]:_(s64), [[BV2_E1]]:_(s64), [[BV2_E2]]:_(s64), [[BV2_E3]]:_(s64), [[BV2_E4]]:_(s64), [[BV2_E5]]:_(s64), [[IMP_DEF1]]:_(s64), [[IMP_DEF1]]:_(s64) + + CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[BV1_LARGE]]:_(<8 x s64>), [[BV2_LARGE]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) + + CHECK: [[SHUF_E0:%[0-9]+]]:_(s64), [[SHUF_E1:%[0-9]+]]:_(s64), [[SHUF_E2:%[0-9]+]]:_(s64), [[SHUF_E3:%[0-9]+]]:_(s64), [[SHUF_E4:%[0-9]+]]:_(s64), [[SHUF_E5:%[0-9]+]]:_(s64), [[SHUF_E6:%[0-9]+]]:_(s64), [[SHUF_E7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]]:_(<8 x s64>) + CHECK: (<6 x s64>) = G_BUILD_VECTOR [[SHUF_E0]]:_(s64), [[SHUF_E1]]:_(s64), [[SHUF_E2]]:_(s64), [[SHUF_E3]]:_(s64), [[SHUF_E4]]:_(s64), [[SHUF_E5]]:_(s64) )"; // Check diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index 68d06003a5e5..ddd057aa1121 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -43,8 +43,8 @@ namespace { template void TestAllForms() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test that we can decode all DW_FORM values correctly. const AddrType AddrValue = (AddrType)0x0123456789abcdefULL; @@ -477,8 +477,8 @@ TEST(DWARFDebugInfo, TestDWARF32Version5Addr8AllForms) { template void TestChildren() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test that we can decode DW_FORM_ref_addr values correctly in DWARF 2 with // 4 byte addresses. DW_FORM_ref_addr values should be 4 bytes when using @@ -619,8 +619,8 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Children) { template void TestReferences() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test that we can decode DW_FORM_refXXX values correctly in DWARF. auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); @@ -881,8 +881,8 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8References) { template void TestAddresses() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test the DWARF APIs related to accessing the DW_AT_low_pc and // DW_AT_high_pc. @@ -1069,8 +1069,8 @@ TEST(DWARFDebugInfo, DISABLED_TestStringOffsets) { TEST(DWARFDebugInfo, TestStringOffsets) { #endif Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); const char *String1 = "Hello"; const char *String2 = "World"; @@ -1133,8 +1133,8 @@ TEST(DWARFDebugInfo, TestStringOffsets) { TEST(DWARFDebugInfo, TestEmptyStringOffsets) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); const char *String1 = "Hello"; @@ -1162,8 +1162,8 @@ TEST(DWARFDebugInfo, TestEmptyStringOffsets) { TEST(DWARFDebugInfo, TestRelations) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test the DWARF APIs related to accessing the DW_AT_low_pc and // DW_AT_high_pc. @@ -1349,8 +1349,8 @@ TEST(DWARFDebugInfo, TestDWARFDie) { TEST(DWARFDebugInfo, TestChildIterators) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test the DWARF APIs related to iterating across the children of a DIE using // the DWARFDie::iterator class. @@ -1458,8 +1458,8 @@ TEST(DWARFDebugInfo, TestEmptyChildren) { TEST(DWARFDebugInfo, TestAttributeIterators) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test the DWARF APIs related to iterating across all attribute values in a // a DWARFDie. @@ -1520,8 +1520,8 @@ TEST(DWARFDebugInfo, TestAttributeIterators) { TEST(DWARFDebugInfo, TestFindRecurse) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); uint16_t Version = 4; auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); @@ -1734,8 +1734,8 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) { TEST(DWARFDebugInfo, TestFindAttrs) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); // Test the DWARFDie::find() and DWARFDie::findRecursively() that take an // ArrayRef value to make sure they work correctly. @@ -1797,8 +1797,8 @@ TEST(DWARFDebugInfo, TestFindAttrs) { TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); uint16_t Version = 5; auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); @@ -1923,7 +1923,7 @@ TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) { TEST(DWARFDebugInfo, TestErrorReporting) { Triple Triple("x86_64-pc-linux"); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); auto ExpectedDG = dwarfgen::Generator::create(Triple, 4 /*DwarfVersion*/); ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded()); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index a8502df2b34b..731aa4f0f3d0 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -36,16 +36,21 @@ struct CommonFixture { EXPECT_FALSE(Unrecoverable); } - bool setupGenerator(uint16_t Version = 4, uint8_t AddrSize = 8) { + // Note: ASSERT_THAT_EXPECTED cannot be used in a non-void function, so + // setupGenerator() is split into two. + void setupGeneratorImpl(uint16_t Version, uint8_t AddrSize) { AddressSize = AddrSize; - Triple T = - getDefaultTargetTripleForAddrSize(AddressSize == 0 ? 8 : AddressSize); + Triple T = getDefaultTargetTripleForAddrSize(AddressSize ? AddressSize : 8); if (!isConfigurationSupported(T)) - return false; + return; auto ExpectedGenerator = Generator::create(T, Version); - if (ExpectedGenerator) - Gen.reset(ExpectedGenerator->release()); - return true; + ASSERT_THAT_EXPECTED(ExpectedGenerator, Succeeded()); + Gen = std::move(*ExpectedGenerator); + } + + bool setupGenerator(uint16_t Version = 4, uint8_t AddrSize = 8) { + setupGeneratorImpl(Version, AddrSize); + return Gen != nullptr; } void generate() { @@ -60,8 +65,7 @@ struct CommonFixture { } std::unique_ptr createContext() { - if (!Gen) - return nullptr; + assert(Gen != nullptr && "Generator is not set up"); StringRef FileBytes = Gen->generate(); MemoryBufferRef FileBuffer(FileBytes, "dwarf"); auto Obj = object::ObjectFile::createObjectFile(FileBuffer); @@ -183,7 +187,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_GetOrParseLineTableAtInvalidOffset) { TEST_F(DebugLineBasicFixture, GetOrParseLineTableAtInvalidOffset) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); generate(); EXPECT_THAT_EXPECTED( @@ -210,7 +214,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, GetOrParseLineTableAtInvalidOffsetAfterData) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0, LineTable::Byte}}); @@ -235,7 +239,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_PrologueGetLength) { TEST_P(DebugLineParameterisedFixture, PrologueGetLength) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(Format); DWARFDebugLine::Prologue Prologue = LT.createBasicPrologue(); LT.setPrologue(Prologue); @@ -262,7 +266,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_GetOrParseLineTableValidTable) { TEST_P(DebugLineParameterisedFixture, GetOrParseLineTableValidTable) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + (Format == DWARF64 ? "DWARF64" : "DWARF32")); @@ -332,7 +336,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForReservedLength) { TEST_F(DebugLineBasicFixture, ErrorForReservedLength) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); @@ -360,7 +364,7 @@ TEST_P(DebugLineUnsupportedVersionFixture, TEST_P(DebugLineUnsupportedVersionFixture, ErrorForUnsupportedVersion) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue( @@ -386,7 +390,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForInvalidV5IncludeDirTable) { TEST_F(DebugLineBasicFixture, ErrorForInvalidV5IncludeDirTable) { #endif if (!setupGenerator(5)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({ @@ -431,7 +435,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_ErrorForTooLargePrologueLength) { TEST_P(DebugLineParameterisedFixture, ErrorForTooLargePrologueLength) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + (Format == DWARF64 ? "DWARF64" : "DWARF32")); @@ -471,7 +475,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_ErrorForTooShortPrologueLength) { TEST_P(DebugLineParameterisedFixture, ErrorForTooShortPrologueLength) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + (Format == DWARF64 ? "DWARF64" : "DWARF32")); @@ -530,7 +534,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ErrorForExtendedOpcodeLengthSmallerThanExpected) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addByte(0xaa); @@ -564,7 +568,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ErrorForExtendedOpcodeLengthLargerThanExpected) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addByte(0xaa); @@ -597,7 +601,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForUnitLengthTooLarge) { TEST_F(DebugLineBasicFixture, ErrorForUnitLengthTooLarge) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable &Padding = Gen->addLineTable(); // Add some padding to show that a non-zero offset is handled correctly. @@ -630,7 +634,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForMismatchedAddressSize) { TEST_F(DebugLineBasicFixture, ErrorForMismatchedAddressSize) { #endif if (!setupGenerator(4, 8)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // The line data extractor expects size 8 (Quad) addresses. @@ -665,7 +669,7 @@ TEST_F(DebugLineBasicFixture, ErrorForMismatchedAddressSizeUnsetInitialAddress) { #endif if (!setupGenerator(4, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); uint64_t Addr1 = 0x11223344; @@ -699,7 +703,7 @@ TEST_F(DebugLineBasicFixture, // Use DWARF v4, and 0 for data extractor address size so that the address // size is derived from the opcode length. if (!setupGenerator(4, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // 4 == length of the extended opcode, i.e. 1 for the opcode itself and 3 for @@ -735,7 +739,7 @@ TEST_F(DebugLineBasicFixture, ErrorForAddressSizeGreaterThanByteSize) { // Use DWARF v4, and 0 for data extractor address size so that the address // size is derived from the opcode length. if (!setupGenerator(4, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // Specifically use an operand size that has a trailing byte of a supported @@ -764,7 +768,7 @@ TEST_F(DebugLineBasicFixture, ErrorForUnsupportedAddressSizeDefinedInHeader) { // Use 0 for data extractor address size so that it does not clash with the // header address size. if (!setupGenerator(5, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // AddressSize + 1 == length of the extended opcode, i.e. 1 for the opcode @@ -803,7 +807,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_CallbackUsedForUnterminatedSequence) { TEST_F(DebugLineBasicFixture, CallbackUsedForUnterminatedSequence) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addExtendedOpcode(9, DW_LNE_set_address, @@ -948,7 +952,7 @@ struct AdjustAddressFixtureBase : public CommonFixture { void runTest(bool CheckAdvancePC, Twine MsgSuffix) { if (!setupGenerator(Version)) - return; + GTEST_SKIP(); setupTables(/*AddAdvancePCFirstTable=*/CheckAdvancePC); @@ -1130,7 +1134,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ParserParsesCorrectly) { TEST_F(DebugLineBasicFixture, ParserParsesCorrectly) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); DWARFDebugLine::SectionParser Parser = setupParser(); @@ -1161,7 +1165,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ParserSkipsCorrectly) { TEST_F(DebugLineBasicFixture, ParserSkipsCorrectly) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); DWARFDebugLine::SectionParser Parser = setupParser(); @@ -1186,7 +1190,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ParserAlwaysDoneForEmptySection) { TEST_F(DebugLineBasicFixture, ParserAlwaysDoneForEmptySection) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); generate(); DWARFDebugLine::SectionParser Parser(LineData, *Context, Units); @@ -1201,7 +1205,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserMarkedAsDoneForBadLengthWhenParsing) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); @@ -1229,7 +1233,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserMarkedAsDoneForBadLengthWhenSkipping) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); @@ -1257,7 +1261,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserReportsFirstErrorInEachTableWhenParsing) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.setCustomPrologue({{2, LineTable::Long}, {0, LineTable::Half}}); @@ -1288,7 +1292,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserReportsNonPrologueProblemsWhenParsing) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.addExtendedOpcode(0x42, DW_LNE_end_sequence, {}); @@ -1326,7 +1330,7 @@ TEST_F(DebugLineBasicFixture, ParserReportsPrologueErrorsInEachTableWhenSkipping) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.setCustomPrologue({{2, LineTable::Long}, {0, LineTable::Half}}); @@ -1357,7 +1361,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserIgnoresNonPrologueErrorsWhenSkipping) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.addExtendedOpcode(42, DW_LNE_end_sequence, {}); @@ -1377,7 +1381,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_VerboseOutput) { TEST_F(DebugLineBasicFixture, VerboseOutput) { #endif if (!setupGenerator(5)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addByte(0); // Extended opcode with zero length. @@ -1537,7 +1541,7 @@ TEST_P(TruncatedPrologueFixture, DISABLED_ErrorForTruncatedPrologue) { TEST_P(TruncatedPrologueFixture, ErrorForTruncatedPrologue) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); LineTable &Padding = Gen->addLineTable(); // Add some padding to show that a non-zero offset is handled correctly. @@ -1719,7 +1723,7 @@ TEST_P(TruncatedExtendedOpcodeFixture, TEST_P(TruncatedExtendedOpcodeFixture, ErrorForTruncatedExtendedOpcode) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = setupTable(); LT.addExtendedOpcode(OpcodeLength, Opcode, Operands); runTest(0); @@ -1803,7 +1807,7 @@ TEST_P(TruncatedStandardOpcodeFixture, TEST_P(TruncatedStandardOpcodeFixture, ErrorForTruncatedStandardOpcode) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = setupTable(); LT.addStandardOpcode(Opcode, Operands); runTest(Opcode); @@ -1863,7 +1867,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_PrintPathsProperly) { TEST_F(DebugLineBasicFixture, PrintPathsProperly) { #endif if (!setupGenerator(5)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); DWARFDebugLine::Prologue P = LT.createBasicPrologue(); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp index 2333fc619edb..6e2ed2f62bcf 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp @@ -24,8 +24,8 @@ TEST(DWARFDie, manualExtractDump) { typedef uint32_t AddrType; uint16_t Version = 4; Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) - return; + if (!isConfigurationSupported(Triple)) + GTEST_SKIP(); auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded()); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp index ae7301b84314..68738e3a3044 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp @@ -51,7 +51,7 @@ void DWARFExpressionCompactPrinterTest::TestExprPrinter( ArrayRef ExprData, StringRef Expected) { // If we didn't build ARM, do not run the test. if (!MRI) - return; + GTEST_SKIP(); // Print the expression, passing in the subprogram DIE, and check that the // result is as expected. diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp index d94341554719..5a6ce2ffbbe8 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp @@ -185,7 +185,7 @@ void DWARFExpressionCopyBytesTest::readAndCheckObjFile( void DWARFExpressionCopyBytesTest::testExpr(ArrayRef ExprData) { // If we didn't build x86, do not run the test. if (!MRI) - return; + GTEST_SKIP(); DataExtractor DE(ExprData, true, 8); DWARFExpression Expr(DE, 8); diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp index e337f34d1972..9a4a794b4451 100644 --- a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp @@ -48,12 +48,6 @@ Triple llvm::dwarf::utils::getDefaultTargetTripleForAddrSize(uint8_t AddrSize) { } bool llvm::dwarf::utils::isConfigurationSupported(Triple &T) { - initLLVMIfNeeded(); - std::string Err; - return TargetRegistry::lookupTarget(T.getTriple(), Err); -} - -bool llvm::dwarf::utils::isObjectEmissionSupported(Triple &T) { initLLVMIfNeeded(); std::string Err; const Target *TheTarget = TargetRegistry::lookupTarget(T.getTriple(), Err); diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h index 00eaef25cfba..036071e0b567 100644 --- a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h +++ b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h @@ -21,7 +21,6 @@ namespace utils { Triple getDefaultTargetTripleForAddrSize(uint8_t AddrSize); Triple getNormalizedDefaultTargetTriple(); bool isConfigurationSupported(Triple &T); -bool isObjectEmissionSupported(Triple &T); } // end namespace utils } // end namespace dwarf diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 3de47ab1c4e5..58b3759ba4a5 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -46,5 +46,408 @@ INSTANTIATE_TEST_SUITE_P( "Interface for demangle.test"), std::make_pair("_D8demangle4test12__ModuleInfoZ", "ModuleInfo for demangle.test"), + std::make_pair("_D8demangle4test6__ctorMFZv", "demangle.test.this()"), + std::make_pair("_D8demangle4test6__dtorMFZv", "demangle.test.~this()"), + std::make_pair("_D8demangle4test10__postblitMFZv", + "demangle.test.this(this)"), std::make_pair("_D8demangle4__S14testZ", "demangle.test"), - std::make_pair("_D8demangle4__Sd4testZ", "demangle.__Sd.test"))); + std::make_pair("_D8demangle4__Sd4testZ", "demangle.__Sd.test"), + std::make_pair("_D8demangle1ii", "demangle.i"), + std::make_pair("_D8demangle2siOi", "demangle.si"), + std::make_pair("_D8demangle1re", "demangle.r"), + std::make_pair("_D8demangle1iinvalidtypeseq", nullptr), + std::make_pair("_D8demangle3ABCQeQg1ai", "demangle.ABC.ABC.ABC.a"), + std::make_pair("_D8demangle3ABCQeQaaaa1ai", nullptr), + std::make_pair("_D8demangle4ABCiQfQh1aQh", "demangle.ABCi.ABCi.ABCi.a"), + std::make_pair("_D8demangle4ABCiQfQh1aQaaa", nullptr), + std::make_pair("_D8demangle4testPFLAiYi", "demangle.test"), + std::make_pair("_D8demangle4testFaZv", "demangle.test(char)"), + std::make_pair("_D8demangle4testFbZv", "demangle.test(bool)"), + std::make_pair("_D8demangle4testFcZv", "demangle.test(creal)"), + std::make_pair("_D8demangle4testFdZv", "demangle.test(double)"), + std::make_pair("_D8demangle4testFeZv", "demangle.test(real)"), + std::make_pair("_D8demangle4testFfZv", "demangle.test(float)"), + std::make_pair("_D8demangle4testFgZv", "demangle.test(byte)"), + std::make_pair("_D8demangle4testFhZv", "demangle.test(ubyte)"), + std::make_pair("_D8demangle4testFiZv", "demangle.test(int)"), + std::make_pair("_D8demangle4testFjZv", "demangle.test(ireal)"), + std::make_pair("_D8demangle4testFkZv", "demangle.test(uint)"), + std::make_pair("_D8demangle4testFlZv", "demangle.test(long)"), + std::make_pair("_D8demangle4testFmZv", "demangle.test(ulong)"), + std::make_pair("_D8demangle4testFZv", "demangle.test()"), + std::make_pair("_D8demangle4testMFZ2fnMFZv", "demangle.test().fn()"), + std::make_pair("_D8demangle4testFnZv", "demangle.test(typeof(null))"), + std::make_pair("_D8demangle4testFNnZv", "demangle.test(typeof(*null))"), + std::make_pair("_D8demangle4testFoZv", "demangle.test(ifloat)"), + std::make_pair("_D8demangle4testFpZv", "demangle.test(idouble)"), + std::make_pair("_D8demangle4testFqZv", "demangle.test(cfloat)"), + std::make_pair("_D8demangle4testFrZv", "demangle.test(cdouble)"), + std::make_pair("_D8demangle4testFsZv", "demangle.test(short)"), + std::make_pair("_D8demangle4testFtZv", "demangle.test(ushort)"), + std::make_pair("_D8demangle4testFuZv", "demangle.test(wchar)"), + std::make_pair("_D8demangle4testFvZv", "demangle.test(void)"), + std::make_pair("_D8demangle4testFwZv", "demangle.test(dchar)"), + std::make_pair("_D8demangle4testFziZv", "demangle.test(cent)"), + std::make_pair("_D8demangle4testFzkZv", "demangle.test(ucent)"), + std::make_pair("_D8demangle4testFOaZv", "demangle.test(shared(char))"), + std::make_pair("_D8demangle4testFxaZv", "demangle.test(const(char))"), + std::make_pair("_D8demangle4testFyaZv", + "demangle.test(immutable(char))"), + std::make_pair("_D8demangle4testFNgaZv", "demangle.test(inout(char))"), + std::make_pair("_D8demangle4testFOxaZv", + "demangle.test(shared(const(char)))"), + std::make_pair("_D8demangle4testFONgaZv", + "demangle.test(shared(inout(char)))"), + std::make_pair("_D8demangle4testFAaZv", "demangle.test(char[])"), + std::make_pair("_D8demangle4testFAAaZv", "demangle.test(char[][])"), + std::make_pair("_D8demangle4testFAAAaZv", "demangle.test(char[][][])"), + std::make_pair("_D8demangle4testFHaaZv", "demangle.test(char[char])"), + std::make_pair("_D8demangle4testFHHaaaZv", + "demangle.test(char[char[char]])"), + std::make_pair("_D8demangle4testFPaZv", "demangle.test(char*)"), + std::make_pair("_D8demangle4testFPPaZv", "demangle.test(char**)"), + std::make_pair("_D8demangle4testFPPPaZv", "demangle.test(char***)"), + std::make_pair("_D8demangle4testFG42aZv", "demangle.test(char[42])"), + std::make_pair("_D8demangle4testFG42G42aZv", + "demangle.test(char[42][42])"), + std::make_pair("_D8demangle4testFG42G42G42aZv", + "demangle.test(char[42][42][42])"), + std::make_pair("_D8demangle4testFG1234567890aZv", + "demangle.test(char[1234567890])"), + std::make_pair("_D8demangle4testFNhG8gZv", + "demangle.test(__vector(byte[8]))"), + std::make_pair("_D8demangle4testFNhG16gZv", + "demangle.test(__vector(byte[16]))"), + std::make_pair("_D8demangle4testFNhG32gZv", + "demangle.test(__vector(byte[32]))"), + std::make_pair("_D8demangle4testFNhG4sZv", + "demangle.test(__vector(short[4]))"), + std::make_pair("_D8demangle4testFNhG8sZv", + "demangle.test(__vector(short[8]))"), + std::make_pair("_D8demangle4testFNhG16sZv", + "demangle.test(__vector(short[16]))"), + std::make_pair("_D8demangle4testFNhG2iZv", + "demangle.test(__vector(int[2]))"), + std::make_pair("_D8demangle4testFNhG4iZv", + "demangle.test(__vector(int[4]))"), + std::make_pair("_D8demangle4testFNhG8iZv", + "demangle.test(__vector(int[8]))"), + std::make_pair("_D8demangle4testFNhG1lZv", + "demangle.test(__vector(long[1]))"), + std::make_pair("_D8demangle4testFNhG2lZv", + "demangle.test(__vector(long[2]))"), + std::make_pair("_D8demangle4testFNhG4lZv", + "demangle.test(__vector(long[4]))"), + std::make_pair("_D8demangle4testFNhG2fZv", + "demangle.test(__vector(float[2]))"), + std::make_pair("_D8demangle4testFNhG4fZv", + "demangle.test(__vector(float[4]))"), + std::make_pair("_D8demangle4testFNhG8fZv", + "demangle.test(__vector(float[8]))"), + std::make_pair("_D8demangle4testFNhG1dZv", + "demangle.test(__vector(double[1]))"), + std::make_pair("_D8demangle4testFNhG2dZv", + "demangle.test(__vector(double[2]))"), + std::make_pair("_D8demangle4testFNhG4dZv", + "demangle.test(__vector(double[4]))"), + std::make_pair("_D8demangle4testFC5classZv", "demangle.test(class)"), + std::make_pair("_D8demangle4testFC5class4testZv", + "demangle.test(class.test)"), + std::make_pair("_D8demangle4testFS6structZv", "demangle.test(struct)"), + std::make_pair("_D8demangle4testFS6struct4testZv", + "demangle.test(struct.test)"), + std::make_pair("_D8demangle4testFE4enumZv", "demangle.test(enum)"), + std::make_pair("_D8demangle4testFE4enum4testZv", + "demangle.test(enum.test)"), + std::make_pair("_D8demangle4testFT7typedefZv", + "demangle.test(typedef)"), + std::make_pair("_D8demangle4testFT7typedef4testZv", + "demangle.test(typedef.test)"), + std::make_pair("_D8demangle4testFIaZv", "demangle.test(in char)"), + std::make_pair("_D8demangle4testFIKaZv", "demangle.test(in ref char)"), + std::make_pair("_D8demangle4testFJaZv", "demangle.test(out char)"), + std::make_pair("_D8demangle4testFKaZv", "demangle.test(ref char)"), + std::make_pair("_D8demangle4testFLaZv", "demangle.test(lazy char)"), + std::make_pair("_D8demangle4testFMaZv", "demangle.test(scope char)"), + std::make_pair("_D8demangle4testFNjaZv", "demangle.test(char)"), + std::make_pair("_D8demangle4testFNkaZv", "demangle.test(return char)"), + std::make_pair("_D8demangle4testFNlaZv", "demangle.test(char)"), + std::make_pair("_D8demangle4testFaXv", "demangle.test(char...)"), + std::make_pair("_D8demangle4testFaYv", "demangle.test(char, ...)"), + std::make_pair("_D8demangle4testFaaYv", + "demangle.test(char, char, ...)"), + std::make_pair("_D8demangle4testFXv", "demangle.test(...)"), + std::make_pair("_D8demangle4testFYv", "demangle.test(, ...)"), + std::make_pair("_D8demangle4testFaaZv", "demangle.test(char, char)"), + std::make_pair("_D8demangle4testFB0Zv", "demangle.test(tuple())"), + std::make_pair("_D8demangle4testFB1aZv", "demangle.test(tuple(char))"), + std::make_pair("_D8demangle4testFB2aaZv", + "demangle.test(tuple(char, char))"), + std::make_pair("_D8demangle4testFB3aaaZv", + "demangle.test(tuple(char, char, char))"), + std::make_pair("_D8demangle4testFB2OaaZv", + "demangle.test(tuple(shared(char), char))"), + std::make_pair("_D8demangle4testFB3aDFZaaZv", + "demangle.test(tuple(char, char() delegate, char))"), + std::make_pair("_D8demangle4testFDFZaZv", + "demangle.test(char() delegate)"), + std::make_pair("_D8demangle4testFDUZaZv", + "demangle.test(extern(C) char() delegate)"), + std::make_pair("_D8demangle4testFDWZaZv", + "demangle.test(extern(Windows) char() delegate)"), + std::make_pair("_D8demangle4testFDVZaZv", + "demangle.test(extern(Pascal) char() delegate)"), + std::make_pair("_D8demangle4testFDRZaZv", + "demangle.test(extern(C++) char() delegate)"), + std::make_pair("_D8demangle4testFDYZaZv", + "demangle.test(extern(Objective-C) char() delegate)"), + std::make_pair("_D8demangle4testFPFZaZv", + "demangle.test(char() function)"), + std::make_pair("_D8demangle4testFPUZaZv", + "demangle.test(extern(C) char() function)"), + std::make_pair("_D8demangle4testFPWZaZv", + "demangle.test(extern(Windows) char() function)"), + std::make_pair("_D8demangle4testFPVZaZv", + "demangle.test(extern(Pascal) char() function)"), + std::make_pair("_D8demangle4testFPRZaZv", + "demangle.test(extern(C++) char() function)"), + std::make_pair("_D8demangle4testFPYZaZv", + "demangle.test(extern(Objective-C) char() function)"), + std::make_pair("_D8demangle4testFDFNaZaZv", + "demangle.test(char() pure delegate)"), + std::make_pair("_D8demangle4testFDFNbZaZv", + "demangle.test(char() nothrow delegate)"), + std::make_pair("_D8demangle4testFDFNcZaZv", + "demangle.test(char() ref delegate)"), + std::make_pair("_D8demangle4testFDFNdZaZv", + "demangle.test(char() @property delegate)"), + std::make_pair("_D8demangle4testFDFNeZaZv", + "demangle.test(char() @trusted delegate)"), + std::make_pair("_D8demangle4testFDFNfZaZv", + "demangle.test(char() @safe delegate)"), + std::make_pair("_D8demangle4testFDFNiZaZv", + "demangle.test(char() @nogc delegate)"), + std::make_pair("_D8demangle4testFDFNmZaZv", + "demangle.test(char() @live delegate)"), + std::make_pair("_D8demangle4testFDFNaNbZaZv", + "demangle.test(char() pure nothrow delegate)"), + std::make_pair("_D8demangle4testFDFNbNaZaZv", + "demangle.test(char() nothrow pure delegate)"), + std::make_pair("_D8demangle4testFDFNdNfNaZaZv", + "demangle.test(char() @property @safe pure delegate)"), + std::make_pair("_D8demangle4testFNjDFZaZv", + "demangle.test(char() delegate)"), + std::make_pair("_D8demangle4testFNkDFZaZv", + "demangle.test(return char() delegate)"), + std::make_pair("_D8demangle4testFDFNjZaZv", + "demangle.test(char() return delegate)"), + std::make_pair("_D8demangle4testFNjNkDFNjZaZv", + "demangle.test(return char() return delegate)"), + std::make_pair("_D8demangle4testFNlDFZaZv", + "demangle.test(char() delegate)"), + std::make_pair("_D8demangle4testFMDFZaZv", + "demangle.test(scope char() delegate)"), + std::make_pair("_D8demangle4testFDFNlZaZv", + "demangle.test(char() scope delegate)"), + std::make_pair("_D8demangle4testFMDFNlZaZv", + "demangle.test(scope char() scope delegate)"), + std::make_pair("_D8demangle4testFNlMDFNlZaZv", + "demangle.test(scope char() scope delegate)"), + std::make_pair("_D8demangle4testFPFNaZaZv", + "demangle.test(char() pure function)"), + std::make_pair("_D8demangle4testFPFNbZaZv", + "demangle.test(char() nothrow function)"), + std::make_pair("_D8demangle4testFPFNcZaZv", + "demangle.test(char() ref function)"), + std::make_pair("_D8demangle4testFPFNdZaZv", + "demangle.test(char() @property function)"), + std::make_pair("_D8demangle4testFPFNeZaZv", + "demangle.test(char() @trusted function)"), + std::make_pair("_D8demangle4testFPFNfZaZv", + "demangle.test(char() @safe function)"), + std::make_pair("_D8demangle4testFPFNiZaZv", + "demangle.test(char() @nogc function)"), + std::make_pair("_D8demangle4testFPFNmZaZv", + "demangle.test(char() @live function)"), + std::make_pair("_D8demangle4testFPFNaNbZaZv", + "demangle.test(char() pure nothrow function)"), + std::make_pair("_D8demangle4testFPFNbNaZaZv", + "demangle.test(char() nothrow pure function)"), + std::make_pair("_D8demangle4testFPFNdNfNaZaZv", + "demangle.test(char() @property @safe pure function)"), + std::make_pair("_D8demangle4testFNjPFZaZv", + "demangle.test(char() function)"), + std::make_pair("_D8demangle4testFNkPFZaZv", + "demangle.test(return char() function)"), + std::make_pair("_D8demangle4testFPFNjZaZv", + "demangle.test(char() return function)"), + std::make_pair("_D8demangle4testFNjNkPFNjZaZv", + "demangle.test(return char() return function)"), + std::make_pair("_D8demangle4testFNlPFZaZv", + "demangle.test(char() function)"), + std::make_pair("_D8demangle4testFMPFZaZv", + "demangle.test(scope char() function)"), + std::make_pair("_D8demangle4testFPFNlZaZv", + "demangle.test(char() scope function)"), + std::make_pair("_D8demangle4testFMPFNlZaZv", + "demangle.test(scope char() scope function)"), + std::make_pair("_D8demangle4testFNlMPFNlZaZv", + "demangle.test(scope char() scope function)"), + std::make_pair("_D8demangle4test6__ctorMFZv", "demangle.test.this()"), + std::make_pair("_D8demangle4test6__dtorMFZv", "demangle.test.~this()"), + std::make_pair("_D8demangle4test10__postblitMFZv", + "demangle.test.this(this)"), + std::make_pair("_D8demangle4testFHAbaZv", + "demangle.test(char[bool[]])"), + std::make_pair("_D8demangle4testFHG42caZv", + "demangle.test(char[creal[42]])"), + std::make_pair("_D8demangle4testFAiXv", "demangle.test(int[]...)"), + std::make_pair("_D8demangle4testFLAiXv", + "demangle.test(lazy int[]...)"), + std::make_pair("_D8demangle4testFAiYv", "demangle.test(int[], ...)"), + std::make_pair("_D8demangle4testFLAiYv", + "demangle.test(lazy int[], ...)"), + std::make_pair("_D8demangle4testFLilZv", + "demangle.test(lazy int, long)"), + std::make_pair("_D8demangle4testFLliZv", + "demangle.test(lazy long, int)"), + std::make_pair( + "_D8demangle4testFLC6ObjectLDFLiZiZi", + "demangle.test(lazy Object, lazy int(lazy int) delegate)"), + std::make_pair("_D8demangle4testMxFZv", "demangle.test() const"), + std::make_pair("_D8demangle4testMyFZv", "demangle.test() immutable"), + std::make_pair("_D8demangle4testMNgFZv", "demangle.test() inout"), + std::make_pair("_D8demangle4testMNgxFZv", + "demangle.test() inout const"), + std::make_pair("_D8demangle4testMOFZv", "demangle.test() shared"), + std::make_pair("_D8demangle4testMOxFZv", + "demangle.test() shared const"), + std::make_pair("_D8demangle4testMONgFZv", + "demangle.test() shared inout"), + std::make_pair("_D8demangle4testMONgxFZv", + "demangle.test() shared inout const"), + std::make_pair("_D8demangle4testFDxFZaZv", + "demangle.test(char() delegate const)"), + std::make_pair("_D8demangle4testFDyFZaZv", + "demangle.test(char() delegate immutable)"), + std::make_pair("_D8demangle4testFDNgFZaZv", + "demangle.test(char() delegate inout)"), + std::make_pair("_D8demangle4testFDNgxFZaZv", + "demangle.test(char() delegate inout const)"), + std::make_pair("_D8demangle4testFDOFZaZv", + "demangle.test(char() delegate shared)"), + std::make_pair("_D8demangle4testFDOxFZaZv", + "demangle.test(char() delegate shared const)"), + std::make_pair("_D8demangle4testFDONgFZaZv", + "demangle.test(char() delegate shared inout)"), + std::make_pair("_D8demangle4testFDONgxFZaZv", + "demangle.test(char() delegate shared inout const)"), + std::make_pair("_D8demangle004testFaZv", "demangle.test(char)"), + std::make_pair("_D8demangle000000004testFaZv", "demangle.test(char)"), + std::make_pair("_D8demangle9__T4testZv", "demangle.test!()"), + std::make_pair("_D8demangle9__U4testZv", "demangle.test!()"), + std::make_pair("_D8demangle11__T4testTaZv", "demangle.test!(char)"), + std::make_pair("_D8demangle13__T4testTaTaZv", + "demangle.test!(char, char)"), + std::make_pair("_D8demangle15__T4testTaTaTaZv", + "demangle.test!(char, char, char)"), + std::make_pair("_D8demangle16__T4testTaTOiTaZv", + "demangle.test!(char, shared(int), char)"), + std::make_pair("_D8demangle17__T4testS6symbolZv", + "demangle.test!(symbol)"), + std::make_pair("_D8demangle23__T4testS116symbol3fooZv", + "demangle.test!(symbol.foo)"), + std::make_pair("_D8demangle32__T4testS20_D6symbol3foo3barFZvZv", + "demangle.test!(symbol.foo.bar())"), + std::make_pair("_D8demangle19__T4testTaS6symbolZv", + "demangle.test!(char, symbol)"), + std::make_pair("_D8demangle19__T4testS6symbolTaZv", + "demangle.test!(symbol, char)"), + std::make_pair("_D8demangle12__T4testHTaZv", "demangle.test!(char)"), + std::make_pair("_D8demangle13__T4testTFZaZ6mangleFZv", + "demangle.test!(char() function).mangle()"), + std::make_pair("_D8demangle15__T4testVgi123Zv", "demangle.test!(123)"), + std::make_pair("_D8demangle15__T4testVii123Zv", "demangle.test!(123)"), + std::make_pair("_D8demangle15__T4testVsi123Zv", "demangle.test!(123)"), + std::make_pair("_D8demangle15__T4testVhi123Zv", "demangle.test!(123u)"), + std::make_pair("_D8demangle15__T4testVki123Zv", "demangle.test!(123u)"), + std::make_pair("_D8demangle15__T4testVti123Zv", "demangle.test!(123u)"), + std::make_pair("_D8demangle15__T4testVli123Zv", "demangle.test!(123L)"), + std::make_pair("_D8demangle15__T4testVmi123Zv", + "demangle.test!(123uL)"), + std::make_pair("_D8demangle15__T4testViN123Zv", "demangle.test!(-123)"), + std::make_pair("_D8demangle15__T4testVkN123Zv", + "demangle.test!(-123u)"), + std::make_pair("_D8demangle15__T4testVlN123Zv", + "demangle.test!(-123L)"), + std::make_pair("_D8demangle15__T4testVmN123Zv", + "demangle.test!(-123uL)"), + std::make_pair("_D8demangle13__T4testVbi1Zv", "demangle.test!(true)"), + std::make_pair("_D8demangle13__T4testVbi0Zv", "demangle.test!(false)"), + std::make_pair("_D8demangle14__T4testVai10Zv", + "demangle.test!('\\x0a')"), + std::make_pair("_D8demangle14__T4testVai32Zv", "demangle.test!(' ')"), + std::make_pair("_D8demangle14__T4testVai65Zv", "demangle.test!('A')"), + std::make_pair("_D8demangle15__T4testVai126Zv", "demangle.test!('~')"), + std::make_pair("_D8demangle16__T4testVui1000Zv", + "demangle.test!('\\u03e8')"), + std::make_pair("_D8demangle18__T4testVwi100000Zv", + "demangle.test!('\\U000186a0')"), + std::make_pair("_D8demangle17__T4testVde0A8P6Zv", + "demangle.test!(0x0.A8p6)"), + std::make_pair("_D8demangle16__T4testVdeA8P2Zv", + "demangle.test!(0xA.8p2)"), + std::make_pair("_D8demangle18__T4testVdeN0A8P6Zv", + "demangle.test!(-0x0.A8p6)"), + std::make_pair("_D8demangle31__T4testVde0F6E978D4FDF3B646P7Zv", + "demangle.test!(0x0.F6E978D4FDF3B646p7)"), + std::make_pair("_D8demangle15__T4testVdeNANZv", "demangle.test!(NaN)"), + std::make_pair("_D8demangle15__T4testVdeINFZv", "demangle.test!(Inf)"), + std::make_pair("_D8demangle16__T4testVdeNINFZv", + "demangle.test!(-Inf)"), + std::make_pair("_D8demangle23__T4testVfe0FFFFFFP128Zv", + "demangle.test!(0x0.FFFFFFp128)"), + std::make_pair("_D8demangle32__T4testVde0FFFFFFFFFFFFF8P1024Zv", + "demangle.test!(0x0.FFFFFFFFFFFFF8p1024)"), + std::make_pair("_D8demangle19__T4testVfe08PN125Zv", + "demangle.test!(0x0.8p-125)"), + std::make_pair("_D8demangle20__T4testVde08PN1021Zv", + "demangle.test!(0x0.8p-1021)"), + std::make_pair( + "_D8demangle51__T4testVrc0C4CCCCCCCCCCCCCDP4c0B666666666666666P6Zv", + "demangle.test!(0x0.C4CCCCCCCCCCCCCDp4+0x0.B666666666666666p6i)"), + std::make_pair( + "_D8demangle52__" + "T4testVrcN0C4CCCCCCCCCCCCCDP4c0B666666666666666P6Zv", + "demangle.test!(-0x0.C4CCCCCCCCCCCCCDp4+0x0.B666666666666666p6i)"), + std::make_pair("_D8demangle91__" + "T4testVde000111222333444555666777888999AAABBBCCCDDDEEEF" + "FFP000111222333444555666777888999Zv", + "demangle.test!(0x0." + "00111222333444555666777888999AAABBBCCCDDDEEEFFFp0001112" + "22333444555666777888999)"), + std::make_pair("_D8demangle22__T4testVG3ua3_616263Zv", + "demangle.test!(\"abc\")"), + std::make_pair("_D8demangle22__T4testVG3ud3_616263Zv", + "demangle.test!(\"abc\"d)"), + std::make_pair("_D8demangle22__T4testVG3uw3_616263Zv", + "demangle.test!(\"abc\"w)"), + std::make_pair("_D8demangle16__T4testVAyaa0_Zv", + "demangle.test!(\"\")"), + std::make_pair("_D8demangle32__T4testVAyaa8_20090a0d0c0b00ffZv", + "demangle.test!(\" \\t\\n\\r\\f\\v\\x00\\xff\")"), + std::make_pair("_D8demangle22__T4testVAiA4i1i2i3i4Zv", + "demangle.test!([1, 2, 3, 4])"), + std::make_pair("_D8demangle25__T4testVAdA2e08P1eN08P1Zv", + "demangle.test!([0x0.8p1, -0x0.8p1])"), + std::make_pair("_D8demangle23__T4testVHiiA2i1i2i3i4Zv", + "demangle.test!([1:2, 3:4])"), + std::make_pair("_D8demangle39__T4testVHAxaiA2a3_616263i1a3_646566i2Zv", + "demangle.test!([\"abc\":1, \"def\":2])"), + std::make_pair("_D8demangle28__T4testVS8demangle1SS2i1i2Zv", + "demangle.test!(demangle.S(1, 2))"), + std::make_pair("_D8demangle35__T4testVS8demangle1SS2i1a3_616263Zv", + "demangle.test!(demangle.S(1, \"abc\"))"), + std::make_pair("_D8demangle13__T4testVPinZv", "demangle.test!(null)"), + std::make_pair("_D8demangle__T3abcS_DQt10__lambda13FNaNbNiNfZiZQBhFZi", + "demangle.abc!(demangle.__lambda13()).abc()"))); diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 6031c808f584..8747e171cfba 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -136,17 +136,17 @@ TEST(VerifierTest, CrossModuleRef) { raw_string_ostream ErrorOS(Error); EXPECT_TRUE(verifyModule(M2, &ErrorOS)); EXPECT_TRUE(StringRef(ErrorOS.str()) - .equals("Global is used by function in a different module\n" - "i32 ()* @foo2\n" - "; ModuleID = 'M2'\n" - "i32 ()* @foo3\n" - "; ModuleID = 'M3'\n" - "Global is referenced in a different module!\n" + .equals("Global is referenced in a different module!\n" "i32 ()* @foo2\n" "; ModuleID = 'M2'\n" " %call = call i32 @foo2()\n" "i32 ()* @foo1\n" - "; ModuleID = 'M1'\n")); + "; ModuleID = 'M1'\n" + "Global is used by function in a different module\n" + "i32 ()* @foo2\n" + "; ModuleID = 'M2'\n" + "i32 ()* @foo3\n" + "; ModuleID = 'M3'\n")); Error.clear(); EXPECT_TRUE(verifyModule(M1, &ErrorOS)); diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index a877fea8223e..f876b6649df5 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -166,6 +166,12 @@ class string: r'^\s*(\.Lfunc_end[0-9]+:\n|end_function)', flags=(re.M | re.S)) +ASM_FUNCTION_VE_RE = re.compile( + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n' + r'(?P^##?[ \t]+[^:]+:.*?)\s*' + r'.Lfunc_end[0-9]+:\n', + flags=(re.M | re.S)) + SCRUB_X86_SHUFFLES_RE = ( re.compile( r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$', @@ -354,6 +360,16 @@ def scrub_asm_wasm32(asm, args): asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) return asm +def scrub_asm_ve(asm, args): + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm) + # Expand the tabs used for indentation. + asm = string.expandtabs(asm, 2) + # Strip trailing whitespace. + asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) + return asm + def get_triple_from_march(march): triples = { 'amdgcn': 'amdgcn', @@ -361,6 +377,7 @@ def get_triple_from_march(march): 'mips': 'mips', 'sparc': 'sparc', 'hexagon': 'hexagon', + 've': 've', } for prefix, triple in triples.items(): if march.startswith(prefix): @@ -404,6 +421,7 @@ def get_run_handler(triple): 'sparc': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE), 's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE), 'wasm32': (scrub_asm_wasm32, ASM_FUNCTION_WASM32_RE), + 've': (scrub_asm_ve, ASM_FUNCTION_VE_RE), } handler = None best_prefix = '' diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index 4882e080d21c..b397621d05d8 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -99,10 +99,7 @@ target(asan_target_type, "asan") { "asan_win.cpp", ] if (target_os != "mac" && target_os != "win") { - sources += [ - "asan_interceptors_vfork.S", - "asan_rtl_x86_64.S", - ] + sources += [ "asan_interceptors_vfork.S" ] } # To be able to include sanitizer_common. @@ -163,6 +160,8 @@ target(asan_target_type, "asan") { "-Wl,-U,___sanitizer_symbolize_data", "-Wl,-U,___sanitizer_symbolize_demangle", "-Wl,-U,___sanitizer_symbolize_flush", + "-Wl,-U,___sanitizer_symbolize_set_demangle", + "-Wl,-U,___sanitizer_symbolize_set_inline_frames", # xray "-Wl,-U,___start_xray_fn_idx", @@ -198,5 +197,9 @@ source_set("unused") { "asan_preinit.cpp", "asan_win_dll_thunk.cpp", "asan_win_dynamic_runtime_thunk.cpp", + + # FIXME: These are in clang_rt.asan_static in the CMake build. + + "asan_rtl_x86_64.S", ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn index 68f356318aed..db354d907a4e 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn @@ -157,6 +157,8 @@ target(tsan_target_type, "rtl") { "-Wl,-U,___sanitizer_symbolize_data", "-Wl,-U,___sanitizer_symbolize_demangle", "-Wl,-U,___sanitizer_symbolize_flush", + "-Wl,-U,___sanitizer_symbolize_set_demangle", + "-Wl,-U,___sanitizer_symbolize_set_inline_frames", # FIXME: better "-Wl,-install_name,@rpath/libclang_rt.tsan_osx_dynamic.dylib", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 85a9353958b9..210041831574 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -18,6 +18,7 @@ if (current_toolchain == default_toolchain) { values = [ "_LIBCPP_ABI_FORCE_ITANIUM=", "_LIBCPP_ABI_FORCE_MICROSOFT=", + "_LIBCPP_EXTRA_SITE_DEFINES=", "_LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT=", "_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY=", "_LIBCPP_HAS_NO_INCOMPLETE_FORMAT=", @@ -200,6 +201,22 @@ if (current_toolchain == default_toolchain) { "__coroutine/trivial_awaitables.h", "__debug", "__errc", + "__filesystem/copy_options.h", + "__filesystem/directory_entry.h", + "__filesystem/directory_iterator.h", + "__filesystem/directory_options.h", + "__filesystem/file_status.h", + "__filesystem/file_time_type.h", + "__filesystem/file_type.h", + "__filesystem/filesystem_error.h", + "__filesystem/operations.h", + "__filesystem/path.h", + "__filesystem/path_iterator.h", + "__filesystem/perm_options.h", + "__filesystem/perms.h", + "__filesystem/recursive_directory_iterator.h", + "__filesystem/space_info.h", + "__filesystem/u8path.h", "__format/format_arg.h", "__format/format_args.h", "__format/format_context.h", @@ -403,8 +420,8 @@ if (current_toolchain == default_toolchain) { "__tuple", "__undef_macros", "__utility/as_const.h", + "__utility/auto_cast.h", "__utility/cmp.h", - "__utility/decay_copy.h", "__utility/declval.h", "__utility/exchange.h", "__utility/forward.h", diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 9e7ffb125f7e..ea20edd1036f 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -292,16 +292,6 @@ class FlatAffineConstraints : public IntegerPolyhedron { unsigned symStartPos, ArrayRef localExprs, MLIRContext *context) const; - /// Gather positions of all lower and upper bounds of the identifier at `pos`, - /// and optionally any equalities on it. In addition, the bounds are to be - /// independent of identifiers in position range [`offset`, `offset` + `num`). - void - getLowerAndUpperBoundIndices(unsigned pos, - SmallVectorImpl *lbIndices, - SmallVectorImpl *ubIndices, - SmallVectorImpl *eqIndices = nullptr, - unsigned offset = 0, unsigned num = 0) const; - /// Removes constraints that are independent of (i.e., do not have a /// coefficient) identifiers in the range [pos, pos + num). void removeIndependentConstraints(unsigned pos, unsigned num); @@ -419,6 +409,16 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); + /// Searches for a constraint with a non-zero coefficient at `colIdx` in + /// equality (isEq=true) or inequality (isEq=false) constraints. + /// Returns true and sets row found in search in `rowIdx`, false otherwise. + bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, + unsigned *rowIdx) const; + + /// Returns true if the pos^th column is all zero for both inequalities and + /// equalities. + bool isColZero(unsigned pos) const; + /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h index 933f20e44122..c7eae0cd29ee 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h @@ -185,6 +185,16 @@ class IntegerPolyhedron { /// Removes all equalities and inequalities. void clearConstraints(); + /// Gather positions of all lower and upper bounds of the identifier at `pos`, + /// and optionally any equalities on it. In addition, the bounds are to be + /// independent of identifiers in position range [`offset`, `offset` + `num`). + void + getLowerAndUpperBoundIndices(unsigned pos, + SmallVectorImpl *lbIndices, + SmallVectorImpl *ubIndices, + SmallVectorImpl *eqIndices = nullptr, + unsigned offset = 0, unsigned num = 0) const; + protected: /// Return the index at which the specified kind of id starts. unsigned getIdKindOffset(IdKind kind) const; diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h new file mode 100644 index 000000000000..0b9c2c707d3e --- /dev/null +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -0,0 +1,40 @@ +//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H +#define MLIR_ANALYSIS_PRESBURGER_UTILS_H + +#include "mlir/Support/LLVM.h" + +namespace mlir { + +class IntegerPolyhedron; + +namespace presburger_utils { + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv +/// can be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> +computeSingleVarRepr(const IntegerPolyhedron &cst, ArrayRef foundRepr, + unsigned pos, SmallVector ÷nd, + unsigned &divisor); + +} // namespace presburger_utils +} // namespace mlir + +#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index eaa1c0b42bac..f671c3ca1dae 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1447,6 +1447,10 @@ def LLVM_MemcpyInlineOp : LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1, 2]> { let arguments = (ins LLVM_Type:$dst, LLVM_Type:$src, LLVM_Type:$len, LLVM_Type:$isVolatile); } +def LLVM_MemmoveOp : LLVM_ZeroResultIntrOp<"memmove", [0, 1, 2]> { + let arguments = (ins LLVM_Type:$dst, LLVM_Type:$src, LLVM_Type:$len, + LLVM_Type:$isVolatile); +} def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2]> { let arguments = (ins LLVM_Type:$dst, LLVM_Type:$val, LLVM_Type:$len, diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index e2014954e9fb..659cd7126559 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -147,6 +147,13 @@ class MLIRContext { /// this call in this case. void setThreadPool(llvm::ThreadPool &pool); + /// Return the number of threads used by the thread pool in this context. The + /// number of computed hardware threads can change over the lifetime of a + /// process based on affinity changes, so users should use the number of + /// threads actually in the thread pool for dispatching work. Returns 1 if + /// multithreading is disabled. + unsigned getNumThreads(); + /// Return the thread pool used by this context. This method requires that /// multithreading be enabled within the context, and should generally not be /// used directly. Users should instead prefer the threading utilities within diff --git a/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h b/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h index 74ddba34ada9..7627fd0145eb 100644 --- a/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h +++ b/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h @@ -40,9 +40,13 @@ class FrozenRewritePatternSet { /// Freeze the patterns held in `patterns`, and take ownership. /// `disabledPatternLabels` is a set of labels used to filter out input - /// patterns with a label in this set. `enabledPatternLabels` is a set of - /// labels used to filter out input patterns that do not have one of the - /// labels in this set. + /// patterns with a debug label or debug name in this set. + /// `enabledPatternLabels` is a set of labels used to filter out input + /// patterns that do not have one of the labels in this set. Debug labels must + /// be set explicitly on patterns or when adding them with + /// `RewritePatternSet::addWithLabel`. Debug names may be empty, but patterns + /// created with `RewritePattern::create` have their default debug name set to + /// their type name. FrozenRewritePatternSet( RewritePatternSet &&patterns, ArrayRef disabledPatternLabels = llvm::None, diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h index 227d213b0dda..4dc04476b583 100644 --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -205,8 +205,7 @@ template class DebugAction { /// Provide classof to allow casting between handler types. static bool classof(const DebugActionManager::HandlerBase *handler) { - return handler->getHandlerID() == - TypeID::get::Handler>(); + return handler->getHandlerID() == TypeID::get(); } }; diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index e6cfda886363..6aab120fb469 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -62,8 +62,17 @@ std::unique_ptr createBufferResultsToOutParamsPass(); std::unique_ptr createCanonicalizerPass(); /// Creates an instance of the Canonicalizer pass with the specified config. +/// `disabledPatterns` is a set of labels used to filter out input patterns with +/// a debug label or debug name in this set. `enabledPatterns` is a set of +/// labels used to filter out input patterns that do not have one of the labels +/// in this set. Debug labels must be set explicitly on patterns or when adding +/// them with `RewritePatternSet::addWithLabel`. Debug names may be empty, but +/// patterns created with `RewritePattern::create` have their default debug name +/// set to their type name. std::unique_ptr -createCanonicalizerPass(const GreedyRewriteConfig &config); +createCanonicalizerPass(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns = llvm::None, + ArrayRef enabledPatterns = llvm::None); /// Creates a pass to perform common sub expression elimination. std::unique_ptr createCSEPass(); diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index f4d857479cde..520262d6fddc 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,6 +13,7 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -700,14 +701,13 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, - unsigned colIdx, bool isEq, - unsigned *rowIdx) { - assert(colIdx < cst.getNumCols() && "position out of bounds"); +bool FlatAffineConstraints::findConstraintWithNonZeroAt( + unsigned colIdx, bool isEq, unsigned *rowIdx) const { + assert(colIdx < getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); + return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); + unsigned e = isEq ? getNumEqualities() : getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,145 +1203,6 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -static Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, - const SmallVector &foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} - void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1378,8 +1239,9 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, - dividends[i], denominators[i])) { + if (auto res = presburger_utils::computeSingleVarRepr( + *this, foundRepr, divOffset + i, dividends[i], + denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1437,11 +1299,9 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1596,60 +1456,6 @@ static bool detectAsMod(const FlatAffineConstraints &cst, unsigned pos, return false; } -/// Gather all lower and upper bounds of the identifier at `pos`, and -/// optionally any equalities on it. In addition, the bounds are to be -/// independent of identifiers in position range [`offset`, `offset` + `num`). -void FlatAffineConstraints::getLowerAndUpperBoundIndices( - unsigned pos, SmallVectorImpl *lbIndices, - SmallVectorImpl *ubIndices, SmallVectorImpl *eqIndices, - unsigned offset, unsigned num) const { - assert(pos < getNumIds() && "invalid position"); - assert(offset + num < getNumCols() && "invalid range"); - - // Checks for a constraint that has a non-zero coeff for the identifiers in - // the position range [offset, offset + num) while ignoring `pos`. - auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) { - unsigned c, f; - auto cst = isEq ? getEquality(r) : getInequality(r); - for (c = offset, f = offset + num; c < f; ++c) { - if (c == pos) - continue; - if (cst[c] != 0) - break; - } - return c < f; - }; - - // Gather all lower bounds and upper bounds of the variable. Since the - // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower - // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1. - for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { - // The bounds are to be independent of [offset, offset + num) columns. - if (containsConstraintDependentOnRange(r, /*isEq=*/false)) - continue; - if (atIneq(r, pos) >= 1) { - // Lower bound. - lbIndices->push_back(r); - } else if (atIneq(r, pos) <= -1) { - // Upper bound. - ubIndices->push_back(r); - } - } - - // An equality is both a lower and upper bound. Record any equalities - // involving the pos^th identifier. - if (!eqIndices) - return; - - for (unsigned r = 0, e = getNumEqualities(); r < e; r++) { - if (atEq(r, pos) == 0) - continue; - if (containsConstraintDependentOnRange(r, /*isEq=*/true)) - continue; - eqIndices->push_back(r); - } -} - /// Check if the pos^th identifier can be expressed as a floordiv of an affine /// function of other identifiers (where the divisor is a positive constant) /// given the initial set of expressions in `exprs`. If it can be, the @@ -1670,7 +1476,8 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, + dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -2109,7 +1916,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { continue; } @@ -3447,12 +3254,10 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -/// Returns true if the pos^th column is all zero for both inequalities and -/// equalities.. -static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { +bool FlatAffineConstraints::isColZero(unsigned pos) const { unsigned rowPos; - return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3471,7 +3276,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(*this, /*pos=*/i)) + if (!memo[i] && !isColZero(/*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/DataFlowAnalysis.cpp b/mlir/lib/Analysis/DataFlowAnalysis.cpp index c6642112e8cb..ca17e953e990 100644 --- a/mlir/lib/Analysis/DataFlowAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlowAnalysis.cpp @@ -756,13 +756,13 @@ void ForwardDataFlowSolver::join(Operation *owner, AbstractLatticeElement &to, // AbstractLatticeElement //===----------------------------------------------------------------------===// -AbstractLatticeElement::~AbstractLatticeElement() {} +AbstractLatticeElement::~AbstractLatticeElement() = default; //===----------------------------------------------------------------------===// // ForwardDataFlowAnalysisBase //===----------------------------------------------------------------------===// -ForwardDataFlowAnalysisBase::~ForwardDataFlowAnalysisBase() {} +ForwardDataFlowAnalysisBase::~ForwardDataFlowAnalysisBase() = default; AbstractLatticeElement & ForwardDataFlowAnalysisBase::getLatticeElement(Value value) { diff --git a/mlir/lib/Analysis/Liveness.cpp b/mlir/lib/Analysis/Liveness.cpp index 39061679e295..64ff321cb1e6 100644 --- a/mlir/lib/Analysis/Liveness.cpp +++ b/mlir/lib/Analysis/Liveness.cpp @@ -27,7 +27,7 @@ struct BlockInfoBuilder { using ValueSetT = Liveness::ValueSetT; /// Constructs an empty block builder. - BlockInfoBuilder() : block(nullptr) {} + BlockInfoBuilder() {} /// Fills the block builder with initial liveness information. BlockInfoBuilder(Block *block) : block(block) { @@ -104,7 +104,7 @@ struct BlockInfoBuilder { } /// The current block. - Block *block; + Block *block{nullptr}; /// The set of all live in values. ValueSetT inValues; diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index 478b212c3b52..e6c9863887b3 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -182,8 +182,7 @@ static bool isAccessIndexInvariant(Value iv, Value index) { DenseSet mlir::getInvariantAccesses(Value iv, ArrayRef indices) { DenseSet res; - for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) { - auto val = indices[idx]; + for (auto val : indices) { if (isAccessIndexInvariant(iv, val)) { res.insert(val); } diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index dd8c8d96d872..d52d4ccdb1c2 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,10 +2,12 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp + Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen LINK_LIBS PUBLIC MLIRIR + MLIRSupport ) diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 3c4f512bf4fb..958f52e2625e 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -217,3 +217,57 @@ void IntegerPolyhedron::clearConstraints() { equalities.resizeVertically(0); inequalities.resizeVertically(0); } + +/// Gather all lower and upper bounds of the identifier at `pos`, and +/// optionally any equalities on it. In addition, the bounds are to be +/// independent of identifiers in position range [`offset`, `offset` + `num`). +void IntegerPolyhedron::getLowerAndUpperBoundIndices( + unsigned pos, SmallVectorImpl *lbIndices, + SmallVectorImpl *ubIndices, SmallVectorImpl *eqIndices, + unsigned offset, unsigned num) const { + assert(pos < getNumIds() && "invalid position"); + assert(offset + num < getNumCols() && "invalid range"); + + // Checks for a constraint that has a non-zero coeff for the identifiers in + // the position range [offset, offset + num) while ignoring `pos`. + auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) { + unsigned c, f; + auto cst = isEq ? getEquality(r) : getInequality(r); + for (c = offset, f = offset + num; c < f; ++c) { + if (c == pos) + continue; + if (cst[c] != 0) + break; + } + return c < f; + }; + + // Gather all lower bounds and upper bounds of the variable. Since the + // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower + // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1. + for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { + // The bounds are to be independent of [offset, offset + num) columns. + if (containsConstraintDependentOnRange(r, /*isEq=*/false)) + continue; + if (atIneq(r, pos) >= 1) { + // Lower bound. + lbIndices->push_back(r); + } else if (atIneq(r, pos) <= -1) { + // Upper bound. + ubIndices->push_back(r); + } + } + + // An equality is both a lower and upper bound. Record any equalities + // involving the pos^th identifier. + if (!eqIndices) + return; + + for (unsigned r = 0, e = getNumEqualities(); r < e; r++) { + if (atEq(r, pos) == 0) + continue; + if (containsConstraintDependentOnRange(r, /*isEq=*/true)) + continue; + eqIndices->push_back(r); + } +} diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp new file mode 100644 index 000000000000..8fb9390a440e --- /dev/null +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -0,0 +1,155 @@ +//===- Utils.cpp - General utilities for Presburger library ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/Analysis/Presburger/IntegerPolyhedron.h" +#include "mlir/Support/LogicalResult.h" + +using namespace mlir; + +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const IntegerPolyhedron &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> presburger_utils::computeSingleVarRepr( + const IntegerPolyhedron &cst, ArrayRef foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index 097828e077e7..a9697ca83a5a 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -817,15 +817,15 @@ mlir::computeSliceUnion(ArrayRef opsA, ArrayRef opsB, FlatAffineValueConstraints sliceUnionCst; assert(sliceUnionCst.getNumDimAndSymbolIds() == 0); std::vector> dependentOpPairs; - for (unsigned i = 0, numOpsA = opsA.size(); i < numOpsA; ++i) { - MemRefAccess srcAccess(opsA[i]); - for (unsigned j = 0, numOpsB = opsB.size(); j < numOpsB; ++j) { - MemRefAccess dstAccess(opsB[j]); + for (auto i : opsA) { + MemRefAccess srcAccess(i); + for (auto j : opsB) { + MemRefAccess dstAccess(j); if (srcAccess.memref != dstAccess.memref) continue; // Check if 'loopDepth' exceeds nesting depth of src/dst ops. - if ((!isBackwardSlice && loopDepth > getNestingDepth(opsA[i])) || - (isBackwardSlice && loopDepth > getNestingDepth(opsB[j]))) { + if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) || + (isBackwardSlice && loopDepth > getNestingDepth(j))) { LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n"); return SliceComputationResult::GenericFailure; } @@ -844,13 +844,12 @@ mlir::computeSliceUnion(ArrayRef opsA, ArrayRef opsB, } if (result.value == DependenceResult::NoDependence) continue; - dependentOpPairs.push_back({opsA[i], opsB[j]}); + dependentOpPairs.emplace_back(i, j); // Compute slice bounds for 'srcAccess' and 'dstAccess'. ComputationSliceState tmpSliceState; - mlir::getComputationSliceState(opsA[i], opsB[j], &dependenceConstraints, - loopDepth, isBackwardSlice, - &tmpSliceState); + mlir::getComputationSliceState(i, j, &dependenceConstraints, loopDepth, + isBackwardSlice, &tmpSliceState); if (sliceUnionCst.getNumDimAndSymbolIds() == 0) { // Initialize 'sliceUnionCst' with the bounds computed in previous step. diff --git a/mlir/lib/Bindings/Python/IRAffine.cpp b/mlir/lib/Bindings/Python/IRAffine.cpp index faf01e5c5f11..c7cdc8243479 100644 --- a/mlir/lib/Bindings/Python/IRAffine.cpp +++ b/mlir/lib/Bindings/Python/IRAffine.cpp @@ -676,7 +676,7 @@ void mlir::python::populateIRAffine(py::module &m) { std::vector res; res.reserve(compressed.size()); for (auto m : compressed) - res.push_back(PyAffineMap(context->getRef(), m)); + res.emplace_back(context->getRef(), m); return res; }) .def_property_readonly( diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index f1206617d069..56d16b337c07 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -524,7 +524,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i32 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i32 return bufferInfo(shapedType); } @@ -534,7 +535,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i64 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i64 return bufferInfo(shapedType); } @@ -544,7 +546,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i8 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i8 return bufferInfo(shapedType); } @@ -554,7 +557,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i16 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i16 return bufferInfo(shapedType); } diff --git a/mlir/lib/Bindings/Python/IRInterfaces.cpp b/mlir/lib/Bindings/Python/IRInterfaces.cpp index c3d41c4d84d7..564f36b9dac3 100644 --- a/mlir/lib/Bindings/Python/IRInterfaces.cpp +++ b/mlir/lib/Bindings/Python/IRInterfaces.cpp @@ -175,8 +175,7 @@ class PyInferTypeOpInterface auto *data = static_cast(userData); data->inferredTypes.reserve(data->inferredTypes.size() + nTypes); for (intptr_t i = 0; i < nTypes; ++i) { - data->inferredTypes.push_back( - PyType(data->pyMlirContext.getRef(), types[i])); + data->inferredTypes.emplace_back(data->pyMlirContext.getRef(), types[i]); } } diff --git a/mlir/lib/Bindings/Python/IRModule.cpp b/mlir/lib/Bindings/Python/IRModule.cpp index 7008e54bd046..633ffe4e111b 100644 --- a/mlir/lib/Bindings/Python/IRModule.cpp +++ b/mlir/lib/Bindings/Python/IRModule.cpp @@ -29,7 +29,7 @@ PyGlobals::PyGlobals() { instance = this; // The default search path include {mlir.}dialects, where {mlir.} is the // package prefix configured at compile time. - dialectSearchPrefixes.push_back(MAKE_MLIR_PYTHON_QUALNAME("dialects")); + dialectSearchPrefixes.emplace_back(MAKE_MLIR_PYTHON_QUALNAME("dialects")); } PyGlobals::~PyGlobals() { instance = nullptr; } diff --git a/mlir/lib/CAPI/IR/Diagnostics.cpp b/mlir/lib/CAPI/IR/Diagnostics.cpp index 2ed05a5a06e6..40639c7ba31b 100644 --- a/mlir/lib/CAPI/IR/Diagnostics.cpp +++ b/mlir/lib/CAPI/IR/Diagnostics.cpp @@ -57,7 +57,7 @@ MlirDiagnosticHandlerID mlirContextAttachDiagnosticHandler( MlirContext context, MlirDiagnosticHandler handler, void *userData, void (*deleteUserData)(void *)) { assert(handler && "unexpected null diagnostic handler"); - if (deleteUserData == NULL) + if (deleteUserData == nullptr) deleteUserData = deleteUserDataNoop; std::shared_ptr sharedUserData(userData, deleteUserData); DiagnosticEngine::HandlerID id = diff --git a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp index 8d6d8776bde3..07fa5c77c13f 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp @@ -15,7 +15,7 @@ using namespace mlir::pdl_to_pdl_interp; // Positions //===----------------------------------------------------------------------===// -Position::~Position() {} +Position::~Position() = default; /// Returns the depth of the first ancestor operation position. unsigned Position::getOperationDepth() const { diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index b22e50549dfd..517f28c2044f 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -487,12 +487,12 @@ static void buildCostGraph(ArrayRef roots, RootOrderingGraph &graph, if (&p == &q) continue; // Insert or retrieve the property of edge from p to q. - RootOrderingCost &cost = graph[q.root][p.root]; - if (!cost.connector /* new edge */ || cost.cost.first > q.depth) { - if (!cost.connector) - cost.cost.second = nextID++; - cost.cost.first = q.depth; - cost.connector = value; + RootOrderingEntry &entry = graph[q.root][p.root]; + if (!entry.connector /* new edge */ || entry.cost.first > q.depth) { + if (!entry.connector) + entry.cost.second = nextID++; + entry.cost.first = q.depth; + entry.connector = value; } } } @@ -570,10 +570,10 @@ static Value buildPredicateList(pdl::PatternOp pattern, for (auto &target : graph) { llvm::dbgs() << " * " << target.first << "\n"; for (auto &source : target.second) { - RootOrderingCost c = source.second; - llvm::dbgs() << " <- " << source.first << ": " << c.cost.first - << ":" << c.cost.second << " via " << c.connector.getLoc() - << "\n"; + RootOrderingEntry &entry = source.second; + llvm::dbgs() << " <- " << source.first << ": " << entry.cost.first + << ":" << entry.cost.second << " via " + << entry.connector.getLoc() << "\n"; } } }); diff --git a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp index a4d68b1343f7..e1a9fa599cff 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp @@ -46,19 +46,19 @@ static SmallVector getCycle(const DenseMap &parents, /// cycle, u \in C, are replaced with a single edge (v_C, v), and the selected /// node u is marked in the ouptut map actualSource[v]. static void contract(RootOrderingGraph &graph, ArrayRef cycle, - const DenseMap &parentCosts, + const DenseMap &parentDepths, DenseMap &actualSource, DenseMap &actualTarget) { Value rep = cycle.front(); DenseSet cycleSet(cycle.begin(), cycle.end()); // Now, contract the cycle, marking the actual sources and targets. - DenseMap repCosts; + DenseMap repEntries; for (auto outer = graph.begin(), e = graph.end(); outer != e; ++outer) { Value target = outer->first; if (cycleSet.contains(target)) { // Target in the cycle => edges incoming to the cycle or within the cycle. - unsigned parentCost = parentCosts.lookup(target); + unsigned parentDepth = parentDepths.lookup(target); for (const auto &inner : outer->second) { Value source = inner.first; // Ignore edges within the cycle. @@ -67,30 +67,30 @@ static void contract(RootOrderingGraph &graph, ArrayRef cycle, // Edge incoming to the cycle. std::pair cost = inner.second.cost; - assert(parentCost <= cost.first && "invalid parent cost"); + assert(parentDepth <= cost.first && "invalid parent depth"); // Subtract the cost of the parent within the cycle from the cost of // the edge incoming to the cycle. This update ensures that the cost // of the minimum-weight spanning arborescence of the entire graph is // the cost of arborescence for the contracted graph plus the cost of // the cycle, no matter which edge in the cycle we choose to drop. - cost.first -= parentCost; - auto it = repCosts.find(source); - if (it == repCosts.end() || it->second.cost > cost) { + cost.first -= parentDepth; + auto it = repEntries.find(source); + if (it == repEntries.end() || it->second.cost > cost) { actualTarget[source] = target; // Do not bother populating the connector (the connector is only // relevant for the final traversal, not for the optimal branching). - repCosts[source].cost = cost; + repEntries[source].cost = cost; } } // Erase the node in the cycle. graph.erase(outer); } else { // Target not in cycle => edges going away from or unrelated to the cycle. - DenseMap &costs = outer->second; + DenseMap &entries = outer->second; Value bestSource; std::pair bestCost; - auto inner = costs.begin(), innerE = costs.end(); + auto inner = entries.begin(), innerE = entries.end(); while (inner != innerE) { Value source = inner->first; if (cycleSet.contains(source)) { @@ -99,7 +99,7 @@ static void contract(RootOrderingGraph &graph, ArrayRef cycle, bestSource = source; bestCost = inner->second.cost; } - costs.erase(inner++); + entries.erase(inner++); } else { ++inner; } @@ -107,14 +107,14 @@ static void contract(RootOrderingGraph &graph, ArrayRef cycle, // There were going-away edges, contract them. if (bestSource) { - costs[rep].cost = bestCost; + entries[rep].cost = bestCost; actualSource[target] = bestSource; } } } // Store the edges to the representative. - graph[rep] = std::move(repCosts); + graph[rep] = std::move(repEntries); } OptimalBranching::OptimalBranching(RootOrderingGraph graph, Value root) @@ -128,8 +128,8 @@ unsigned OptimalBranching::solve() { // A map that stores the cost of the optimal local choice for each node // in a directed cycle. This map is cleared every time we seed the search. - DenseMap parentCosts; - parentCosts.reserve(graph.size()); + DenseMap parentDepths; + parentDepths.reserve(graph.size()); // Determine if the optimal local choice results in an acyclic graph. This is // done by computing the optimal local choice and traversing up the computed @@ -142,27 +142,30 @@ unsigned OptimalBranching::solve() { // Follow the trail of best sources until we reach an already visited node. // The code will assert if we cannot reach an already visited node, i.e., // the graph is not strongly connected. - parentCosts.clear(); + parentDepths.clear(); do { auto it = graph.find(node); assert(it != graph.end() && "the graph is not strongly connected"); + // Find the best local parent, taking into account both the depth and the + // tie breaking rules. Value &bestSource = parents[node]; - unsigned &bestCost = parentCosts[node]; + std::pair bestCost; for (const auto &inner : it->second) { - const RootOrderingCost &cost = inner.second; - if (!bestSource /* initial */ || bestCost > cost.cost.first) { + const RootOrderingEntry &entry = inner.second; + if (!bestSource /* initial */ || bestCost > entry.cost) { bestSource = inner.first; - bestCost = cost.cost.first; + bestCost = entry.cost; } } assert(bestSource && "the graph is not strongly connected"); + parentDepths[node] = bestCost.first; node = bestSource; - totalCost += bestCost; + totalCost += bestCost.first; } while (!parents.count(node)); // If we reached a non-root node, we have a cycle. - if (parentCosts.count(node)) { + if (parentDepths.count(node)) { // Determine the cycle starting at the representative node. SmallVector cycle = getCycle(parents, node); @@ -171,7 +174,7 @@ unsigned OptimalBranching::solve() { DenseMap actualSource, actualTarget; // Contract the cycle and recurse. - contract(graph, cycle, parentCosts, actualSource, actualTarget); + contract(graph, cycle, parentDepths, actualSource, actualTarget); totalCost = solve(); // Redirect the going-away edges. @@ -186,7 +189,7 @@ unsigned OptimalBranching::solve() { Value entry = actualTarget.lookup(parent); cycle.push_back(node); // complete the cycle for (size_t i = 0, e = cycle.size() - 1; i < e; ++i) { - totalCost += parentCosts.lookup(cycle[i]); + totalCost += parentDepths.lookup(cycle[i]); if (cycle[i] == entry) parents[cycle[i]] = parent; // break the cycle else diff --git a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h index 642b6a076f5c..553c5b2de21a 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h +++ b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h @@ -56,12 +56,12 @@ namespace pdl_to_pdl_interp { /// op4 and op5 in the cost graph, because the subtrees rooted at these two /// roots do not intersect. It is easy to see that the optimal root for this /// pattern is op3, resulting in the spanning arborescence op3 -> {op4, op5}. -struct RootOrderingCost { +struct RootOrderingEntry { /// The depth of the connector `Value` w.r.t. the target root. /// - /// This is a pair where the first entry is the actual cost, and the second - /// entry is a priority for breaking ties (with 0 being the highest). - /// Typically, the priority is a unique edge ID. + /// This is a pair where the first value is the additive cost (the depth of + /// the connector), and the second value is a priority for breaking ties + /// (with 0 being the highest). Typically, the priority is a unique edge ID. std::pair cost; /// The connector value in the intersection of the two subtrees rooted at @@ -75,7 +75,7 @@ struct RootOrderingCost { /// is indexed by the target node, and the inner map is indexed by the source /// node. Each edge is associated with a cost and the underlying connector /// value. -using RootOrderingGraph = DenseMap>; +using RootOrderingGraph = DenseMap>; /// The optimal branching algorithm solver. This solver accepts a graph and the /// root in its constructor, and is invoked via the solve() member function. diff --git a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp index a17eb411acc8..7e2dfaf4db21 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp @@ -99,4 +99,4 @@ ArrayRef AffineValueMap::getOperands() const { AffineMap AffineValueMap::getAffineMap() const { return map.getAffineMap(); } -AffineValueMap::~AffineValueMap() {} +AffineValueMap::~AffineValueMap() = default; diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp index 237094d40006..add9a802ad78 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp @@ -56,7 +56,7 @@ void AffineParallelize::runOnFunction() { f.walk([&](AffineForOp loop) { SmallVector reductions; if (isLoopParallel(loop, parallelReductions ? &reductions : nullptr)) - parallelizableLoops.push_back({loop, std::move(reductions)}); + parallelizableLoops.emplace_back(loop, std::move(reductions)); }); for (const ParallelizationCandidate &candidate : parallelizableLoops) { diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index f876153834bb..5063d8c66977 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -41,8 +41,8 @@ struct LoopUnroll : public AffineLoopUnrollBase { LoopUnroll() : getUnrollFactor(nullptr) {} LoopUnroll(const LoopUnroll &other) - : AffineLoopUnrollBase(other), - getUnrollFactor(other.getUnrollFactor) {} + + = default; explicit LoopUnroll( Optional unrollFactor = None, bool unrollUpToFactor = false, bool unrollFull = false, diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 0e41d02d3c27..9d59b89ea1a2 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -1494,7 +1494,7 @@ getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, // Add a new empty level to the output if it doesn't exist already. assert(currentLevel <= loops.size() && "Unexpected currentLevel"); if (currentLevel == loops.size()) - loops.push_back(SmallVector()); + loops.emplace_back(); // Add current match and recursively visit its children. loops[currentLevel].push_back(cast(match.getMatchedOperation())); @@ -1631,7 +1631,7 @@ static void computeIntersectionBuckets( // it. if (!intersects) { bucketRoots.push_back(matchRoot); - intersectionBuckets.push_back(SmallVector()); + intersectionBuckets.emplace_back(); intersectionBuckets.back().push_back(match); } } diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 13bcb9fedb71..480c09aa97a1 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -591,7 +591,7 @@ static bool hasNoInterveningEffect(Operation *start, T memOp) { SmallPtrSet done; // Traverse the CFG until hitting `to`. - while (todoBlocks.size()) { + while (!todoBlocks.empty()) { Block *blk = todoBlocks.pop_back_val(); if (done.count(blk)) continue; diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index aca6e4f3c27a..a413fb263775 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -1056,13 +1056,21 @@ static bool applyCmpPredicateToEqualOperands(arith::CmpIPredicate predicate) { llvm_unreachable("unknown cmpi predicate kind"); } +static Attribute getBoolAttribute(Type type, MLIRContext *ctx, bool value) { + auto boolAttr = BoolAttr::get(ctx, value); + ShapedType shapedType = type.dyn_cast_or_null(); + if (!shapedType) + return boolAttr; + return DenseElementsAttr::get(shapedType, boolAttr); +} + OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "cmpi takes two operands"); // cmpi(pred, x, x) if (getLhs() == getRhs()) { auto val = applyCmpPredicateToEqualOperands(getPredicate()); - return BoolAttr::get(getContext(), val); + return getBoolAttribute(getType(), getContext(), val); } auto lhs = operands.front().dyn_cast_or_null(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index 5255bd2d7b00..61a9f1b0cdb3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -33,7 +33,7 @@ namespace { struct LinalgComprehensiveModuleBufferize : public LinalgComprehensiveModuleBufferizeBase< LinalgComprehensiveModuleBufferize> { - LinalgComprehensiveModuleBufferize() {} + LinalgComprehensiveModuleBufferize() = default; LinalgComprehensiveModuleBufferize( const LinalgComprehensiveModuleBufferize &p) {} diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 4858ffd657eb..024d380454d3 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -61,8 +61,8 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { "expected an array for dimension level types"); return {}; } - for (unsigned i = 0, e = arrayAttr.size(); i < e; i++) { - auto strAttr = arrayAttr[i].dyn_cast(); + for (auto i : arrayAttr) { + auto strAttr = i.dyn_cast(); if (!strAttr) { parser.emitError(parser.getNameLoc(), "expected a string value in dimension level types"); diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp index e0ed33bbe692..9a19b63ed198 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp @@ -215,7 +215,7 @@ struct DepthwiseConv2DIsMul : public OpRewritePattern { class TosaOptimization : public PassWrapper { public: - explicit TosaOptimization() {} + explicit TosaOptimization() = default; void runOnFunction() override; StringRef getArgument() const final { return PASS_NAME; } diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 27a0fc6d9dcb..8c724f8ef0be 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -702,7 +702,7 @@ getDimMap(ArrayRef indexingMaps, ArrayAttr iteratorTypes, int64_t lhsDim = getResultIndex(indexingMaps[0], targetExpr); int64_t rhsDim = getResultIndex(indexingMaps[1], targetExpr); if (lhsDim >= 0 && rhsDim >= 0) - dimMap.push_back({lhsDim, rhsDim}); + dimMap.emplace_back(lhsDim, rhsDim); } return dimMap; } @@ -1125,11 +1125,11 @@ static Value foldExtractFromBroadcast(ExtractOp extractOp) { auto getRank = [](Type type) { return type.isa() ? type.cast().getRank() : 0; }; - unsigned broadcasrSrcRank = getRank(source.getType()); + unsigned broadcastSrcRank = getRank(source.getType()); unsigned extractResultRank = getRank(extractOp.getType()); - if (extractResultRank < broadcasrSrcRank) { + if (extractResultRank < broadcastSrcRank) { auto extractPos = extractVector(extractOp.position()); - unsigned rankDiff = broadcasrSrcRank - extractResultRank; + unsigned rankDiff = broadcastSrcRank - extractResultRank; extractPos.erase( extractPos.begin(), std::next(extractPos.begin(), extractPos.size() - rankDiff)); @@ -1236,12 +1236,12 @@ class ExtractOpFromBroadcast final : public OpRewritePattern { auto getRank = [](Type type) { return type.isa() ? type.cast().getRank() : 0; }; - unsigned broadcasrSrcRank = getRank(source.getType()); + unsigned broadcastSrcRank = getRank(source.getType()); unsigned extractResultRank = getRank(extractOp.getType()); // We only consider the case where the rank of the source is smaller than // the rank of the extract dst. The other cases are handled in the folding // patterns. - if (extractResultRank <= broadcasrSrcRank) + if (extractResultRank <= broadcastSrcRank) return failure(); rewriter.replaceOpWithNewOp( extractOp, extractOp.getType(), source); diff --git a/mlir/lib/ExecutionEngine/AsyncRuntime.cpp b/mlir/lib/ExecutionEngine/AsyncRuntime.cpp index 1cf593bd4a4b..03243700e9ec 100644 --- a/mlir/lib/ExecutionEngine/AsyncRuntime.cpp +++ b/mlir/lib/ExecutionEngine/AsyncRuntime.cpp @@ -289,7 +289,7 @@ extern "C" int64_t mlirAsyncRuntimeAddTokenToGroup(AsyncToken *token, // then, and re-ackquire the lock. group->addRef(); - token->awaiters.push_back([group, onTokenReady]() { + token->awaiters.emplace_back([group, onTokenReady]() { // Make sure that `dropRef` does not destroy the mutex owned by the lock. { std::unique_lock lockGroup(group->mu); @@ -408,7 +408,7 @@ extern "C" void mlirAsyncRuntimeAwaitTokenAndExecute(AsyncToken *token, lock.unlock(); execute(); } else { - token->awaiters.push_back([execute]() { execute(); }); + token->awaiters.emplace_back([execute]() { execute(); }); } } @@ -421,7 +421,7 @@ extern "C" void mlirAsyncRuntimeAwaitValueAndExecute(AsyncValue *value, lock.unlock(); execute(); } else { - value->awaiters.push_back([execute]() { execute(); }); + value->awaiters.emplace_back([execute]() { execute(); }); } } @@ -434,7 +434,7 @@ extern "C" void mlirAsyncRuntimeAwaitAllInGroupAndExecute(AsyncGroup *group, lock.unlock(); execute(); } else { - group->awaiters.push_back([execute]() { execute(); }); + group->awaiters.emplace_back([execute]() { execute(); }); } } diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index 889d8c76964d..da284294df37 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -109,7 +109,7 @@ extern "C" void print_flops(double flops) { extern "C" double rtclock() { #ifndef _WIN32 struct timeval tp; - int stat = gettimeofday(&tp, NULL); + int stat = gettimeofday(&tp, nullptr); if (stat != 0) fprintf(stderr, "Error returning time from gettimeofday: %d\n", stat); return (tp.tv_sec + tp.tv_usec * 1.0e-6); diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 99d467e34e2c..5cc40665a8af 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -217,7 +217,7 @@ class SparseTensorStorageBase { /// Finishes insertion. virtual void endInsert() = 0; - virtual ~SparseTensorStorageBase() {} + virtual ~SparseTensorStorageBase() = default; private: void fatal(const char *tp) { @@ -277,7 +277,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { } } - virtual ~SparseTensorStorage() {} + virtual ~SparseTensorStorage() = default; /// Get the rank of the tensor. uint64_t getRank() const { return sizes.size(); } @@ -574,7 +574,7 @@ static void readMMEHeader(FILE *file, char *filename, char *line, exit(1); } // Skip comments. - while (1) { + while (true) { if (!fgets(line, kColWidth, file)) { fprintf(stderr, "Cannot find data in %s\n", filename); exit(1); @@ -598,7 +598,7 @@ static void readMMEHeader(FILE *file, char *filename, char *line, static void readExtFROSTTHeader(FILE *file, char *filename, char *line, uint64_t *idata) { // Skip comments. - while (1) { + while (true) { if (!fgets(line, kColWidth, file)) { fprintf(stderr, "Cannot find data in %s\n", filename); exit(1); diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 0b281069823c..376e5c16fe6c 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -54,9 +54,9 @@ void OperationName::dump() const { print(llvm::errs()); } // AsmParser //===--------------------------------------------------------------------===// -AsmParser::~AsmParser() {} -DialectAsmParser::~DialectAsmParser() {} -OpAsmParser::~OpAsmParser() {} +AsmParser::~AsmParser() = default; +DialectAsmParser::~DialectAsmParser() = default; +OpAsmParser::~OpAsmParser() = default; MLIRContext *AsmParser::getContext() const { return getBuilder().getContext(); } @@ -64,13 +64,13 @@ MLIRContext *AsmParser::getContext() const { return getBuilder().getContext(); } // DialectAsmPrinter //===----------------------------------------------------------------------===// -DialectAsmPrinter::~DialectAsmPrinter() {} +DialectAsmPrinter::~DialectAsmPrinter() = default; //===----------------------------------------------------------------------===// // OpAsmPrinter //===----------------------------------------------------------------------===// -OpAsmPrinter::~OpAsmPrinter() {} +OpAsmPrinter::~OpAsmPrinter() = default; void OpAsmPrinter::printFunctionalType(Operation *op) { auto &os = getStream(); @@ -1221,7 +1221,7 @@ class AsmStateImpl { AsmState::AsmState(Operation *op, const OpPrintingFlags &printerFlags, LocationMap *locationMap) : impl(std::make_unique(op, printerFlags, locationMap)) {} -AsmState::~AsmState() {} +AsmState::~AsmState() = default; //===----------------------------------------------------------------------===// // AsmPrinter::Impl @@ -2116,7 +2116,7 @@ void AsmPrinter::Impl::printDialectType(Type type) { // AsmPrinter //===--------------------------------------------------------------------===// -AsmPrinter::~AsmPrinter() {} +AsmPrinter::~AsmPrinter() = default; raw_ostream &AsmPrinter::getStream() const { assert(impl && "expected AsmPrinter::getStream to be overriden"); diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index 9b91054104dd..3d3e835f4abf 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -342,7 +342,7 @@ AffineMap Builder::getShiftedAffineMap(AffineMap map, int64_t shift) { // OpBuilder //===----------------------------------------------------------------------===// -OpBuilder::Listener::~Listener() {} +OpBuilder::Listener::~Listener() = default; /// Insert the given operation at the current insertion point and return it. Operation *OpBuilder::insert(Operation *op) { diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index fc252585dd21..327264a046c8 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -257,7 +257,7 @@ void DiagnosticEngineImpl::emit(Diagnostic diag) { //===----------------------------------------------------------------------===// DiagnosticEngine::DiagnosticEngine() : impl(new DiagnosticEngineImpl()) {} -DiagnosticEngine::~DiagnosticEngine() {} +DiagnosticEngine::~DiagnosticEngine() = default; /// Register a new handler for diagnostics to the engine. This function returns /// a unique identifier for the registered handler, which can be used to @@ -434,7 +434,7 @@ SourceMgrDiagnosticHandler::SourceMgrDiagnosticHandler( : SourceMgrDiagnosticHandler(mgr, ctx, llvm::errs(), std::move(shouldShowLocFn)) {} -SourceMgrDiagnosticHandler::~SourceMgrDiagnosticHandler() {} +SourceMgrDiagnosticHandler::~SourceMgrDiagnosticHandler() = default; void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message, DiagnosticSeverity kind, @@ -952,7 +952,7 @@ struct ParallelDiagnosticHandlerImpl : public llvm::PrettyStackTraceEntry { ParallelDiagnosticHandler::ParallelDiagnosticHandler(MLIRContext *ctx) : impl(new ParallelDiagnosticHandlerImpl(ctx)) {} -ParallelDiagnosticHandler::~ParallelDiagnosticHandler() {} +ParallelDiagnosticHandler::~ParallelDiagnosticHandler() = default; /// Set the order id for the current thread. void ParallelDiagnosticHandler::setOrderIDForThread(size_t orderID) { diff --git a/mlir/lib/IR/Dialect.cpp b/mlir/lib/IR/Dialect.cpp index 631dc410632a..6d1d48edbefd 100644 --- a/mlir/lib/IR/Dialect.cpp +++ b/mlir/lib/IR/Dialect.cpp @@ -124,7 +124,7 @@ Dialect::Dialect(StringRef name, MLIRContext *context, TypeID id) assert(isValidNamespace(name) && "invalid dialect namespace"); } -Dialect::~Dialect() {} +Dialect::~Dialect() = default; /// Verify an attribute from this dialect on the argument at 'argIndex' for /// the region at 'regionIndex' on the given operation. Returns failure if @@ -196,7 +196,7 @@ void Dialect::addInterface(std::unique_ptr interface) { // Dialect Interface //===----------------------------------------------------------------------===// -DialectInterface::~DialectInterface() {} +DialectInterface::~DialectInterface() = default; DialectInterfaceCollectionBase::DialectInterfaceCollectionBase( MLIRContext *ctx, TypeID interfaceKind) { @@ -208,7 +208,7 @@ DialectInterfaceCollectionBase::DialectInterfaceCollectionBase( } } -DialectInterfaceCollectionBase::~DialectInterfaceCollectionBase() {} +DialectInterfaceCollectionBase::~DialectInterfaceCollectionBase() = default; /// Get the interface for the dialect of given operation, or null if one /// is not registered. diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 666ce3013f98..7e811316c4e6 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -321,7 +321,7 @@ MLIRContext::MLIRContext(const DialectRegistry ®istry, Threading setting) impl->affineUniquer.registerParametricStorageType(); } -MLIRContext::~MLIRContext() {} +MLIRContext::~MLIRContext() = default; /// Copy the specified array of elements into memory managed by the provided /// bump pointer allocator. This assumes the elements are all PODs. @@ -518,6 +518,16 @@ void MLIRContext::setThreadPool(llvm::ThreadPool &pool) { enableMultithreading(); } +unsigned MLIRContext::getNumThreads() { + if (isMultithreadingEnabled()) { + assert(impl->threadPool && + "multi-threading is enabled but threadpool not set"); + return impl->threadPool->getThreadCount(); + } + // No multithreading or active thread pool. Return 1 thread. + return 1; +} + llvm::ThreadPool &MLIRContext::getThreadPool() { assert(isMultithreadingEnabled() && "expected multi-threading to be enabled within the context"); diff --git a/mlir/lib/Parser/AsmParserState.cpp b/mlir/lib/Parser/AsmParserState.cpp index 95f64910541f..c5c0c9cb9a2c 100644 --- a/mlir/lib/Parser/AsmParserState.cpp +++ b/mlir/lib/Parser/AsmParserState.cpp @@ -91,7 +91,7 @@ void AsmParserState::Impl::resolveSymbolUses() { //===----------------------------------------------------------------------===// AsmParserState::AsmParserState() : impl(std::make_unique()) {} -AsmParserState::~AsmParserState() {} +AsmParserState::~AsmParserState() = default; AsmParserState &AsmParserState::operator=(AsmParserState &&other) { impl = std::move(other.impl); return *this; diff --git a/mlir/lib/Parser/AttributeParser.cpp b/mlir/lib/Parser/AttributeParser.cpp index 13da92c05ff7..881e5b6d0e6d 100644 --- a/mlir/lib/Parser/AttributeParser.cpp +++ b/mlir/lib/Parser/AttributeParser.cpp @@ -670,7 +670,7 @@ DenseElementsAttr TensorLiteralParser::getStringAttr(llvm::SMLoc loc, for (auto val : storage) { stringValues.push_back(val.second.getStringValue()); - stringRefValues.push_back(stringValues.back()); + stringRefValues.emplace_back(stringValues.back()); } return DenseStringElementsAttr::get(type, stringRefValues); diff --git a/mlir/lib/Parser/DialectSymbolParser.cpp b/mlir/lib/Parser/DialectSymbolParser.cpp index 36f583c60211..ded29d9d1f6b 100644 --- a/mlir/lib/Parser/DialectSymbolParser.cpp +++ b/mlir/lib/Parser/DialectSymbolParser.cpp @@ -32,7 +32,7 @@ class CustomDialectAsmParser : public AsmParserImpl { CustomDialectAsmParser(StringRef fullSpec, Parser &parser) : AsmParserImpl(parser.getToken().getLoc(), parser), fullSpec(fullSpec) {} - ~CustomDialectAsmParser() override {} + ~CustomDialectAsmParser() override = default; /// Returns the full specification of the symbol being parsed. This allows /// for using a separate parser if necessary. diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index 87d8f4bcf4de..da765b61243b 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -1233,8 +1233,7 @@ class CustomOpAsmParser : public AsmParserImpl { std::pair getResultName(unsigned resultNo) const override { // Scan for the resultID that contains this result number. - for (unsigned nameID = 0, e = resultIDs.size(); nameID != e; ++nameID) { - const auto &entry = resultIDs[nameID]; + for (const auto &entry : resultIDs) { if (resultNo < std::get<1>(entry)) { // Don't pass on the leading %. StringRef name = std::get<0>(entry).drop_front(); diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp index 403d88d2d293..e3d22f2b1cb0 100644 --- a/mlir/lib/Pass/IRPrinting.cpp +++ b/mlir/lib/Pass/IRPrinting.cpp @@ -188,7 +188,7 @@ PassManager::IRPrinterConfig::IRPrinterConfig(bool printModuleScope, printAfterOnlyOnChange(printAfterOnlyOnChange), printAfterOnlyOnFailure(printAfterOnlyOnFailure), opPrintingFlags(opPrintingFlags) {} -PassManager::IRPrinterConfig::~IRPrinterConfig() {} +PassManager::IRPrinterConfig::~IRPrinterConfig() = default; /// A hook that may be overridden by a derived config that checks if the IR /// of 'operation' should be dumped *before* the pass 'pass' has been diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 911753650d5e..fff008330c79 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -182,9 +182,9 @@ void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() { // Walk the pass list and merge adjacent adaptors. OpToOpPassAdaptor *lastAdaptor = nullptr; - for (auto it = passes.begin(), e = passes.end(); it != e; ++it) { + for (auto &passe : passes) { // Check to see if this pass is an adaptor. - if (auto *currentAdaptor = dyn_cast(it->get())) { + if (auto *currentAdaptor = dyn_cast(passe.get())) { // If it is the first adaptor in a possible chain, remember it and // continue. if (!lastAdaptor) { @@ -194,7 +194,7 @@ void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() { // Otherwise, merge into the existing adaptor and delete the current one. currentAdaptor->mergeInto(*lastAdaptor); - it->reset(); + passe.reset(); } else if (lastAdaptor) { // If this pass is not an adaptor, then coalesce and forget any existing // adaptor. @@ -236,7 +236,7 @@ OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { return *this; } -OpPassManager::~OpPassManager() {} +OpPassManager::~OpPassManager() = default; OpPassManager::pass_iterator OpPassManager::begin() { return MutableArrayRef>{impl->passes}.begin(); @@ -634,7 +634,7 @@ PassManager::PassManager(MLIRContext *ctx, Nesting nesting, initializationKey(DenseMapInfo::getTombstoneKey()), passTiming(false), verifyPasses(true) {} -PassManager::~PassManager() {} +PassManager::~PassManager() = default; void PassManager::enableVerifier(bool enabled) { verifyPasses = enabled; } @@ -771,7 +771,7 @@ void detail::NestedAnalysisMap::invalidate( // PassInstrumentation //===----------------------------------------------------------------------===// -PassInstrumentation::~PassInstrumentation() {} +PassInstrumentation::~PassInstrumentation() = default; //===----------------------------------------------------------------------===// // PassInstrumentor @@ -790,7 +790,7 @@ struct PassInstrumentorImpl { } // namespace mlir PassInstrumentor::PassInstrumentor() : impl(new PassInstrumentorImpl()) {} -PassInstrumentor::~PassInstrumentor() {} +PassInstrumentor::~PassInstrumentor() = default; /// See PassInstrumentation::runBeforePipeline for details. void PassInstrumentor::runBeforePipeline( diff --git a/mlir/lib/Pass/PassCrashRecovery.cpp b/mlir/lib/Pass/PassCrashRecovery.cpp index 46fec825d3d9..ea642ce44f5d 100644 --- a/mlir/lib/Pass/PassCrashRecovery.cpp +++ b/mlir/lib/Pass/PassCrashRecovery.cpp @@ -196,7 +196,7 @@ struct PassCrashReproducerGenerator::Impl { PassCrashReproducerGenerator::PassCrashReproducerGenerator( PassManager::ReproducerStreamFactory &streamFactory, bool localReproducer) : impl(std::make_unique(streamFactory, localReproducer)) {} -PassCrashReproducerGenerator::~PassCrashReproducerGenerator() {} +PassCrashReproducerGenerator::~PassCrashReproducerGenerator() = default; void PassCrashReproducerGenerator::initialize( iterator_range passes, Operation *op, diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index eae50918dbea..e86a315dc355 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -505,13 +505,13 @@ namespace { /// This struct represents the possible data entries in a parsed pass pipeline /// list. struct PassArgData { - PassArgData() : registryEntry(nullptr) {} + PassArgData() {} PassArgData(const PassRegistryEntry *registryEntry) : registryEntry(registryEntry) {} /// This field is used when the parsed option corresponds to a registered pass /// or pass pipeline. - const PassRegistryEntry *registryEntry; + const PassRegistryEntry *registryEntry{nullptr}; /// This field is set when instance specific pass options have been provided /// on the command line. @@ -694,7 +694,7 @@ struct PassPipelineCLParserImpl { PassPipelineCLParser::PassPipelineCLParser(StringRef arg, StringRef description) : impl(std::make_unique( arg, description, /*passNamesOnly=*/false)) {} -PassPipelineCLParser::~PassPipelineCLParser() {} +PassPipelineCLParser::~PassPipelineCLParser() = default; /// Returns true if this parser contains any valid options to add. bool PassPipelineCLParser::hasAnyOccurrences() const { @@ -737,7 +737,7 @@ PassNameCLParser::PassNameCLParser(StringRef arg, StringRef description) arg, description, /*passNamesOnly=*/true)) { impl->passList.setMiscFlag(llvm::cl::CommaSeparated); } -PassNameCLParser::~PassNameCLParser() {} +PassNameCLParser::~PassNameCLParser() = default; /// Returns true if this parser contains any valid options to add. bool PassNameCLParser::hasAnyOccurrences() const { diff --git a/mlir/lib/Pass/PassStatistics.cpp b/mlir/lib/Pass/PassStatistics.cpp index 8b1b020746c0..aa46adaf76e9 100644 --- a/mlir/lib/Pass/PassStatistics.cpp +++ b/mlir/lib/Pass/PassStatistics.cpp @@ -92,7 +92,7 @@ static void printResultsAsList(raw_ostream &os, OpPassManager &pm) { // Sort the statistics by pass name and then by record name. std::vector>> passAndStatistics; for (auto &passIt : mergedStats) - passAndStatistics.push_back({passIt.first(), std::move(passIt.second)}); + passAndStatistics.emplace_back(passIt.first(), std::move(passIt.second)); llvm::sort(passAndStatistics, [](const auto &lhs, const auto &rhs) { return lhs.first.compare(rhs.first) < 0; }); diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index a9d17431defe..05d4c594100b 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -111,7 +111,7 @@ void ReductionNode::update(std::pair result) { if (interesting == Tester::Interestingness::True) { // This module may has been updated. Reset the range. ranges.clear(); - ranges.push_back({0, std::distance(region->op_begin(), region->op_end())}); + ranges.emplace_back(0, std::distance(region->op_begin(), region->op_end())); } else { // Release the uninteresting module to save some memory. module.release()->erase(); diff --git a/mlir/lib/Reducer/Tester.cpp b/mlir/lib/Reducer/Tester.cpp index e5197418f3e5..367ba4f76b05 100644 --- a/mlir/lib/Reducer/Tester.cpp +++ b/mlir/lib/Reducer/Tester.cpp @@ -62,7 +62,7 @@ Tester::Interestingness Tester::isInteresting(StringRef testCase) const { testerArgs.push_back(testCase); for (const std::string &arg : testScriptArgs) - testerArgs.push_back(arg); + testerArgs.emplace_back(arg); testerArgs.push_back(testCase); diff --git a/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp b/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp index 20c71b51c5f8..7988150bcba5 100644 --- a/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp +++ b/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp @@ -133,4 +133,4 @@ FrozenRewritePatternSet::FrozenRewritePatternSet( pdlPatterns.takeRewriteFunctions()); } -FrozenRewritePatternSet::~FrozenRewritePatternSet() {} +FrozenRewritePatternSet::~FrozenRewritePatternSet() = default; diff --git a/mlir/lib/Rewrite/PatternApplicator.cpp b/mlir/lib/Rewrite/PatternApplicator.cpp index ae8beff061c8..d5a98fef09e7 100644 --- a/mlir/lib/Rewrite/PatternApplicator.cpp +++ b/mlir/lib/Rewrite/PatternApplicator.cpp @@ -28,7 +28,7 @@ PatternApplicator::PatternApplicator( bytecode->initializeMutableState(*mutableByteCodeState); } } -PatternApplicator::~PatternApplicator() {} +PatternApplicator::~PatternApplicator() = default; #ifndef NDEBUG /// Log a message for a pattern that is impossible to match. diff --git a/mlir/lib/Support/StorageUniquer.cpp b/mlir/lib/Support/StorageUniquer.cpp index 9291199fdce1..a33eb53d4d48 100644 --- a/mlir/lib/Support/StorageUniquer.cpp +++ b/mlir/lib/Support/StorageUniquer.cpp @@ -332,7 +332,7 @@ struct StorageUniquerImpl { } // namespace mlir StorageUniquer::StorageUniquer() : impl(new StorageUniquerImpl()) {} -StorageUniquer::~StorageUniquer() {} +StorageUniquer::~StorageUniquer() = default; /// Set the flag specifying if multi-threading is disabled within the uniquer. void StorageUniquer::disableMultithreading(bool disable) { diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index 63e07324c0b1..7277e4859582 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -60,7 +60,7 @@ class TimingManagerImpl { TimingManager::TimingManager() : impl(std::make_unique()) {} -TimingManager::~TimingManager() {} +TimingManager::~TimingManager() = default; /// Get the root timer of this timing manager. Timer TimingManager::getRootTimer() { diff --git a/mlir/lib/TableGen/Attribute.cpp b/mlir/lib/TableGen/Attribute.cpp index 630a9205fc6c..ac62220d85e4 100644 --- a/mlir/lib/TableGen/Attribute.cpp +++ b/mlir/lib/TableGen/Attribute.cpp @@ -219,7 +219,7 @@ std::vector EnumAttr::getAllCases() const { cases.reserve(inits->size()); for (const llvm::Init *init : *inits) { - cases.push_back(EnumAttrCase(cast(init))); + cases.emplace_back(cast(init)); } return cases; diff --git a/mlir/lib/TableGen/Pass.cpp b/mlir/lib/TableGen/Pass.cpp index f96180689c55..84b3f01d1255 100644 --- a/mlir/lib/TableGen/Pass.cpp +++ b/mlir/lib/TableGen/Pass.cpp @@ -66,9 +66,9 @@ StringRef PassStatistic::getDescription() const { Pass::Pass(const llvm::Record *def) : def(def) { for (auto *init : def->getValueAsListOfDefs("options")) - options.push_back(PassOption(init)); + options.emplace_back(init); for (auto *init : def->getValueAsListOfDefs("statistics")) - statistics.push_back(PassStatistic(init)); + statistics.emplace_back(init); for (StringRef dialect : def->getValueAsListOfStrings("dependentDialects")) dependentDialects.push_back(dialect); } diff --git a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp index b459af368fc5..92cd68162093 100644 --- a/mlir/lib/TableGen/Pattern.cpp +++ b/mlir/lib/TableGen/Pattern.cpp @@ -663,7 +663,7 @@ std::vector Pattern::getConstraints() const { &def, "operands to additional constraints can only be symbol references"); } - entities.push_back(std::string(argName->getValue())); + entities.emplace_back(argName->getValue()); } ret.emplace_back(cast(dagInit->getOperator())->getDef(), diff --git a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp index c987198f0c12..4efb5332cced 100644 --- a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp +++ b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp @@ -157,7 +157,7 @@ class TypeFromLLVMIRTranslatorImpl { LLVM::TypeFromLLVMIRTranslator::TypeFromLLVMIRTranslator(MLIRContext &context) : impl(new detail::TypeFromLLVMIRTranslatorImpl(context)) {} -LLVM::TypeFromLLVMIRTranslator::~TypeFromLLVMIRTranslator() {} +LLVM::TypeFromLLVMIRTranslator::~TypeFromLLVMIRTranslator() = default; Type LLVM::TypeFromLLVMIRTranslator::translateType(llvm::Type *type) { return impl->translateType(type); diff --git a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp index 3d67984d7e41..2cd98691e0c3 100644 --- a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp +++ b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp @@ -180,7 +180,7 @@ class TypeToLLVMIRTranslatorImpl { LLVM::TypeToLLVMIRTranslator::TypeToLLVMIRTranslator(llvm::LLVMContext &context) : impl(new detail::TypeToLLVMIRTranslatorImpl(context)) {} -LLVM::TypeToLLVMIRTranslator::~TypeToLLVMIRTranslator() {} +LLVM::TypeToLLVMIRTranslator::~TypeToLLVMIRTranslator() = default; llvm::Type *LLVM::TypeToLLVMIRTranslator::translateType(Type type) { return impl->translateType(type); diff --git a/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp b/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp index 0d043ba6ccbe..93600465abcf 100644 --- a/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp @@ -198,7 +198,7 @@ void LSPServer::Impl::onDocumentSymbol( LSPServer::LSPServer(MLIRServer &server, JSONTransport &transport) : impl(std::make_unique(server, transport)) {} -LSPServer::~LSPServer() {} +LSPServer::~LSPServer() = default; LogicalResult LSPServer::run() { MessageHandler messageHandler(impl->transport); diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp index 88aa4626b42f..046368a3c429 100644 --- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp @@ -878,7 +878,7 @@ struct lsp::MLIRServer::Impl { lsp::MLIRServer::MLIRServer(DialectRegistry ®istry) : impl(std::make_unique(registry)) {} -lsp::MLIRServer::~MLIRServer() {} +lsp::MLIRServer::~MLIRServer() = default; void lsp::MLIRServer::addOrUpdateDocument( const URIForFile &uri, StringRef contents, int64_t version, diff --git a/mlir/lib/Transforms/Canonicalizer.cpp b/mlir/lib/Transforms/Canonicalizer.cpp index 745477d396fb..cb532746c448 100644 --- a/mlir/lib/Transforms/Canonicalizer.cpp +++ b/mlir/lib/Transforms/Canonicalizer.cpp @@ -21,7 +21,13 @@ using namespace mlir; namespace { /// Canonicalize operations in nested regions. struct Canonicalizer : public CanonicalizerBase { - Canonicalizer(const GreedyRewriteConfig &config) : config(config) {} + Canonicalizer(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns, + ArrayRef enabledPatterns) + : config(config) { + this->disabledPatterns = disabledPatterns; + this->enabledPatterns = enabledPatterns; + } Canonicalizer() { // Default constructed Canonicalizer takes its settings from command line @@ -61,6 +67,9 @@ std::unique_ptr mlir::createCanonicalizerPass() { /// Creates an instance of the Canonicalizer pass with the specified config. std::unique_ptr -mlir::createCanonicalizerPass(const GreedyRewriteConfig &config) { - return std::make_unique(config); +mlir::createCanonicalizerPass(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns, + ArrayRef enabledPatterns) { + return std::make_unique(config, disabledPatterns, + enabledPatterns); } diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp index 32b12e7a3bf4..f080b4d112ba 100644 --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -663,7 +663,7 @@ LogicalResult InlinerPass::optimizeSCC(CallGraph &cg, CGUseList &useList, // Optimize each of the nodes within the SCC in parallel. if (failed(optimizeSCCAsync(nodesToVisit, context))) - return failure(); + return failure(); // Recompute the uses held by each of the nodes. for (CallGraphNode *node : nodesToVisit) @@ -674,11 +674,12 @@ LogicalResult InlinerPass::optimizeSCC(CallGraph &cg, CGUseList &useList, LogicalResult InlinerPass::optimizeSCCAsync(MutableArrayRef nodesToVisit, MLIRContext *ctx) { - // Ensure that there are enough pipeline maps for the optimizer to run in - // parallel. Note: The number of pass managers here needs to remain constant + // We must maintain a fixed pool of pass managers which is at least as large + // as the maximum parallelism of the failableParallelForEach below. + // Note: The number of pass managers here needs to remain constant // to prevent issues with pass instrumentations that rely on having the same // pass manager for the main thread. - size_t numThreads = llvm::hardware_concurrency().compute_thread_count(); + size_t numThreads = ctx->getNumThreads(); if (opPipelines.size() < numThreads) { // Reserve before resizing so that we can use a reference to the first // element. @@ -700,6 +701,8 @@ InlinerPass::optimizeSCCAsync(MutableArrayRef nodesToVisit, bool expectedInactive = false; return isActive.compare_exchange_strong(expectedInactive, true); }); + assert(it != activePMs.end() && + "could not find inactive pass manager for thread"); unsigned pmIndex = it - activePMs.begin(); // Optimize this callable node. diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index 801268efdddb..e6db0cf6927b 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -198,7 +198,7 @@ struct MemRefDependenceGraph { // The next unique identifier to use for newly created graph nodes. unsigned nextNodeId = 0; - MemRefDependenceGraph() {} + MemRefDependenceGraph() = default; // Initializes the dependence graph based on operations in 'f'. // Returns true on success, false otherwise. diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 97c6a4daaf82..a299b8c5b660 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1492,7 +1492,7 @@ LogicalResult ConversionPatternRewriterImpl::notifyMatchFailure( ConversionPatternRewriter::ConversionPatternRewriter(MLIRContext *ctx) : PatternRewriter(ctx), impl(new detail::ConversionPatternRewriterImpl(*this)) {} -ConversionPatternRewriter::~ConversionPatternRewriter() {} +ConversionPatternRewriter::~ConversionPatternRewriter() = default; void ConversionPatternRewriter::replaceOpWithIf( Operation *op, ValueRange newValues, bool *allUsesReplaced, diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp index e9ea661cb68e..7fd46c711db0 100644 --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -255,11 +255,6 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef regions) { #else LogicalResult matchResult = matcher.matchAndRewrite(op, *this); #endif - - -#ifndef NDEBUG -#endif - changed |= succeeded(matchResult); } diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index c0eb9cdfaf84..fbb79b5af3f8 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -407,7 +407,7 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp, lbShift = d * step; } // Augment the list of operations that get into the current open interval. - opGroupQueue.push_back({d, sortedOpGroups[d]}); + opGroupQueue.emplace_back(d, sortedOpGroups[d]); } // Those operations groups left in the queue now need to be processed (FIFO) @@ -1370,7 +1370,7 @@ struct JamBlockGatherer { while (it != e && !isa(&*it)) ++it; if (it != subBlockStart) - subBlocks.push_back({subBlockStart, std::prev(it)}); + subBlocks.emplace_back(subBlockStart, std::prev(it)); // Process all for ops that appear next. while (it != e && isa(&*it)) walk(&*it++); @@ -1608,8 +1608,7 @@ static bool checkLoopInterchangeDependences( // lexicographically negative. // Example 1: [-1, 1][0, 0] // Example 2: [0, 0][-1, 1] - for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) { - const SmallVector &depComps = depCompsVec[i]; + for (const auto &depComps : depCompsVec) { assert(depComps.size() >= maxLoopDepth); // Check if the first non-zero dependence component is positive. // This iterates through loops in the desired order. @@ -1748,8 +1747,7 @@ AffineForOp mlir::sinkSequentialLoops(AffineForOp forOp) { // Mark loops as either parallel or sequential. SmallVector isParallelLoop(maxLoopDepth, true); - for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) { - SmallVector &depComps = depCompsVec[i]; + for (auto &depComps : depCompsVec) { assert(depComps.size() >= maxLoopDepth); for (unsigned j = 0; j < maxLoopDepth; ++j) { DependenceComponent &depComp = depComps[j]; @@ -3181,7 +3179,7 @@ gatherLoopsInBlock(Block *block, unsigned currLoopDepth, // Add a new empty level to output if it doesn't exist level already. assert(currLoopDepth <= depthToLoops.size() && "Unexpected currLoopDepth"); if (currLoopDepth == depthToLoops.size()) - depthToLoops.push_back(SmallVector()); + depthToLoops.emplace_back(); for (auto &op : *block) { if (auto forOp = dyn_cast(op)) { diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 328ed1d028c7..96a630a248cf 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -22,6 +22,32 @@ func @cmpi_equal_operands(%arg0: i64) : i1, i1, i1, i1, i1, i1, i1, i1, i1, i1 } +// Test case: Folding of comparisons with equal vector operands. +// CHECK-LABEL: @cmpi_equal_vector_operands +// CHECK-DAG: %[[T:.*]] = arith.constant dense +// CHECK-DAG: %[[F:.*]] = arith.constant dense +// CHECK: return %[[T]], %[[T]], %[[T]], %[[T]], %[[T]], +// CHECK-SAME: %[[F]], %[[F]], %[[F]], %[[F]], %[[F]] +func @cmpi_equal_vector_operands(%arg0: vector<1x8xi64>) + -> (vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1>) { + %0 = arith.cmpi eq, %arg0, %arg0 : vector<1x8xi64> + %1 = arith.cmpi sle, %arg0, %arg0 : vector<1x8xi64> + %2 = arith.cmpi sge, %arg0, %arg0 : vector<1x8xi64> + %3 = arith.cmpi ule, %arg0, %arg0 : vector<1x8xi64> + %4 = arith.cmpi uge, %arg0, %arg0 : vector<1x8xi64> + %5 = arith.cmpi ne, %arg0, %arg0 : vector<1x8xi64> + %6 = arith.cmpi slt, %arg0, %arg0 : vector<1x8xi64> + %7 = arith.cmpi sgt, %arg0, %arg0 : vector<1x8xi64> + %8 = arith.cmpi ult, %arg0, %arg0 : vector<1x8xi64> + %9 = arith.cmpi ugt, %arg0, %arg0 : vector<1x8xi64> + return %0, %1, %2, %3, %4, %5, %6, %7, %8, %9 + : vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1> +} + // ----- // CHECK-LABEL: @indexCastOfSignExtend diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index ebb59aee530e..e1eff69a4251 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -350,6 +350,14 @@ llvm.func @memcpy_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { llvm.return } +// CHECK-LABEL: @memmove_test +llvm.func @memmove_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { + %i1 = llvm.mlir.constant(false) : i1 + // CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 {{.*}}) + "llvm.intr.memmove"(%arg2, %arg3, %arg0, %i1) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + llvm.return +} + // CHECK-LABEL: @memset_test llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) { %i1 = llvm.mlir.constant(false) : i1 diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir index 98692853bf24..00a0445f5a88 100644 --- a/mlir/test/Transforms/inlining.mlir +++ b/mlir/test/Transforms/inlining.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -inline='default-pipeline=''' | FileCheck %s +// RUN: mlir-opt %s --mlir-disable-threading -inline='default-pipeline=''' | FileCheck %s // RUN: mlir-opt %s -inline='default-pipeline=''' -mlir-print-debuginfo -mlir-print-local-scope | FileCheck %s --check-prefix INLINE-LOC // RUN: mlir-opt %s -inline | FileCheck %s --check-prefix INLINE_SIMPLIFY diff --git a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp index 081bc89e8747..01ab39503e9f 100644 --- a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp +++ b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp @@ -108,11 +108,11 @@ struct TestAliasAnalysisModRefPass }); // Check for aliasing behavior between each of the values. - for (auto it = valsToCheck.begin(), e = valsToCheck.end(); it != e; ++it) { + for (auto &it : valsToCheck) { getOperation()->walk([&](Operation *op) { if (!op->getAttr("test.ptr")) return; - printModRefResult(aliasAnalysis.getModRef(op, *it), op, *it); + printModRefResult(aliasAnalysis.getModRef(op, it), op, it); }); } } diff --git a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp index d3e6788711d8..01fba3acfa7d 100644 --- a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp +++ b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp @@ -48,18 +48,18 @@ getDirectionVectorStr(bool ret, unsigned numCommonLoops, unsigned loopNestDepth, if (dependenceComponents.empty() || loopNestDepth > numCommonLoops) return "true"; std::string result; - for (unsigned i = 0, e = dependenceComponents.size(); i < e; ++i) { + for (const auto &dependenceComponent : dependenceComponents) { std::string lbStr = "-inf"; - if (dependenceComponents[i].lb.hasValue() && - dependenceComponents[i].lb.getValue() != + if (dependenceComponent.lb.hasValue() && + dependenceComponent.lb.getValue() != std::numeric_limits::min()) - lbStr = std::to_string(dependenceComponents[i].lb.getValue()); + lbStr = std::to_string(dependenceComponent.lb.getValue()); std::string ubStr = "+inf"; - if (dependenceComponents[i].ub.hasValue() && - dependenceComponents[i].ub.getValue() != + if (dependenceComponent.ub.hasValue() && + dependenceComponent.ub.getValue() != std::numeric_limits::max()) - ubStr = std::to_string(dependenceComponents[i].ub.getValue()); + ubStr = std::to_string(dependenceComponent.ub.getValue()); result += "[" + lbStr + ", " + ubStr + "]"; } diff --git a/mlir/test/lib/IR/TestDiagnostics.cpp b/mlir/test/lib/IR/TestDiagnostics.cpp index 8bff82a22e62..e6bb5f90f57e 100644 --- a/mlir/test/lib/IR/TestDiagnostics.cpp +++ b/mlir/test/lib/IR/TestDiagnostics.cpp @@ -23,7 +23,7 @@ struct TestDiagnosticFilterPass StringRef getDescription() const final { return "Test diagnostic filtering support."; } - TestDiagnosticFilterPass() {} + TestDiagnosticFilterPass() = default; TestDiagnosticFilterPass(const TestDiagnosticFilterPass &) {} void runOnOperation() override { diff --git a/mlir/test/lib/IR/TestOpaqueLoc.cpp b/mlir/test/lib/IR/TestOpaqueLoc.cpp index 92a5c5a41b2c..0b24126d062b 100644 --- a/mlir/test/lib/IR/TestOpaqueLoc.cpp +++ b/mlir/test/lib/IR/TestOpaqueLoc.cpp @@ -26,11 +26,11 @@ struct TestOpaqueLoc /// A simple structure which is used for testing as an underlying location in /// OpaqueLoc. struct MyLocation { - MyLocation() : id(42) {} + MyLocation() = default; MyLocation(int id) : id(id) {} int getId() { return id; } - int id; + int id{42}; }; void runOnOperation() override { diff --git a/mlir/test/lib/Pass/TestDynamicPipeline.cpp b/mlir/test/lib/Pass/TestDynamicPipeline.cpp index d10dd4a200fd..90b88202f7e5 100644 --- a/mlir/test/lib/Pass/TestDynamicPipeline.cpp +++ b/mlir/test/lib/Pass/TestDynamicPipeline.cpp @@ -32,7 +32,7 @@ class TestDynamicPipelinePass pm.getDependentDialects(registry); } - TestDynamicPipelinePass(){}; + TestDynamicPipelinePass() = default; TestDynamicPipelinePass(const TestDynamicPipelinePass &) {} void runOnOperation() override { diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Transforms/TestLoopMapping.cpp index ebd739885236..721e697dd902 100644 --- a/mlir/test/lib/Transforms/TestLoopMapping.cpp +++ b/mlir/test/lib/Transforms/TestLoopMapping.cpp @@ -32,7 +32,7 @@ class TestLoopMappingPass StringRef getDescription() const final { return "test mapping a single loop on a virtual processor grid"; } - explicit TestLoopMappingPass() {} + explicit TestLoopMappingPass() = default; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); diff --git a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp index 1a92f4a162ee..e098e89da39d 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp @@ -205,7 +205,7 @@ class LLVMEnumAttr : public tblgen::EnumAttr { std::vector cases; for (auto &c : tblgen::EnumAttr::getAllCases()) - cases.push_back(LLVMEnumAttrCase(c)); + cases.emplace_back(c); return cases; } diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 2b5767e32567..adbd8407af99 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -1291,7 +1291,7 @@ void OperationFormat::genElementParser(Element *element, MethodBody &body, llvm::raw_string_ostream os(attrTypeStr); os << tgfmt(*typeBuilder, &attrTypeCtx); } else { - attrTypeStr = "Type{}"; + attrTypeStr = "::mlir::Type{}"; } if (var->attr.isOptional()) { body << formatv(optionalAttrParserCode, var->name, attrTypeStr); @@ -1375,7 +1375,7 @@ void OperationFormat::genElementParser(Element *element, MethodBody &body, listName); }) .Default([&](auto operand) { - body << formatv(typeParserCode, "Type", listName); + body << formatv(typeParserCode, "::mlir::Type", listName); }); } } else if (auto *dir = dyn_cast(element)) { @@ -1517,7 +1517,7 @@ void OperationFormat::genParserOperandTypeResolution( // once. Use llvm::concat to perform the merge. llvm::concat does not allow // the case of a single range, so guard it here. if (op.getNumOperands() > 1) { - body << "::llvm::concat("; + body << "::llvm::concat("; llvm::interleaveComma( llvm::seq(0, op.getNumOperands()), body, [&](int i) { body << "::llvm::ArrayRef<::mlir::Type>("; diff --git a/mlir/tools/mlir-tblgen/PassGen.cpp b/mlir/tools/mlir-tblgen/PassGen.cpp index 1edd4db551cd..7d98a600519f 100644 --- a/mlir/tools/mlir-tblgen/PassGen.cpp +++ b/mlir/tools/mlir-tblgen/PassGen.cpp @@ -94,7 +94,7 @@ static void emitPassOptionDecls(const Pass &pass, raw_ostream &os) { os.indent(2) << "::mlir::Pass::" << (opt.isListOption() ? "ListOption" : "Option"); - os << llvm::formatv("<{0}> {1}{{*this, \"{2}\", ::llvm::cl::desc(\"{3}\")", + os << llvm::formatv(R"(<{0}> {1}{{*this, "{2}", ::llvm::cl::desc("{3}"))", opt.getType(), opt.getCppVariableName(), opt.getArgument(), opt.getDescription()); if (Optional defaultVal = opt.getDefaultValue()) @@ -198,7 +198,7 @@ static void emitDecls(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) { os << "/* Autogenerated by mlir-tblgen; don't manually edit */\n"; std::vector passes; for (const auto *def : recordKeeper.getAllDerivedDefinitions("PassBase")) - passes.push_back(Pass(def)); + passes.emplace_back(def); emitPassDecls(passes, os); emitRegistration(passes, os); diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index 2a290bb86b27..ad0b05e4c47b 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -1103,7 +1103,7 @@ emitExtendedSetDeserializationDispatch(const RecordKeeper &recordKeeper, Operator op(def); auto setName = def->getValueAsString("extendedInstSetName"); if (!extensionSets.count(setName)) { - extensionSetNames.push_back(""); + extensionSetNames.emplace_back(""); extensionSets.try_emplace(setName, extensionSetNames.back()); auto &setos = extensionSets.find(setName)->second; setos << formatv(" if ({0} == \"{1}\") {{\n", extensionSetName, setName); diff --git a/mlir/unittests/Analysis/CMakeLists.txt b/mlir/unittests/Analysis/CMakeLists.txt index 3346426fa036..b6340ec72812 100644 --- a/mlir/unittests/Analysis/CMakeLists.txt +++ b/mlir/unittests/Analysis/CMakeLists.txt @@ -7,7 +7,7 @@ add_mlir_unittest(MLIRAnalysisTests ) target_link_libraries(MLIRAnalysisTests - PRIVATE + PRIVATE MLIRLoopAnalysis MLIRParser ) diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index 21506862a302..2798c443cf98 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(ExecutionEngine) add_subdirectory(Interfaces) add_subdirectory(IR) add_subdirectory(Pass) +add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Transforms) diff --git a/mlir/unittests/Pass/PassManagerTest.cpp b/mlir/unittests/Pass/PassManagerTest.cpp index bd6ef0462a71..ff1cda2116ba 100644 --- a/mlir/unittests/Pass/PassManagerTest.cpp +++ b/mlir/unittests/Pass/PassManagerTest.cpp @@ -12,6 +12,8 @@ #include "mlir/Pass/Pass.h" #include "gtest/gtest.h" +#include + using namespace mlir; using namespace mlir::detail; @@ -105,7 +107,7 @@ TEST(PassManagerTest, InvalidPass) { // check it later. std::unique_ptr diagnostic; context.getDiagEngine().registerHandler([&](Diagnostic &diag) { - diagnostic.reset(new Diagnostic(std::move(diag))); + diagnostic = std::make_unique(std::move(diag)); }); // Instantiate and run our pass. diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt index 6616a793ec12..fd1e66205c07 100644 --- a/mlir/unittests/Support/CMakeLists.txt +++ b/mlir/unittests/Support/CMakeLists.txt @@ -7,4 +7,4 @@ add_mlir_unittest(MLIRSupportTests ) target_link_libraries(MLIRSupportTests - PRIVATE MLIRSupportIndentedOstream MLIRSupport) + PRIVATE MLIRSupport) diff --git a/mlir/unittests/Transforms/CMakeLists.txt b/mlir/unittests/Transforms/CMakeLists.txt index 9636f93835eb..b78f3cd8cf22 100644 --- a/mlir/unittests/Transforms/CMakeLists.txt +++ b/mlir/unittests/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_unittest(MLIRTransformsTests + Canonicalizer.cpp DialectConversion.cpp ) target_link_libraries(MLIRTransformsTests diff --git a/mlir/unittests/Transforms/Canonicalizer.cpp b/mlir/unittests/Transforms/Canonicalizer.cpp new file mode 100644 index 000000000000..de968326f63e --- /dev/null +++ b/mlir/unittests/Transforms/Canonicalizer.cpp @@ -0,0 +1,84 @@ +//===- DialectConversion.cpp - Dialect conversion unit tests --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/PatternMatch.h" +#include "mlir/Parser.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" +#include "gtest/gtest.h" + +using namespace mlir; + +namespace { + +struct DisabledPattern : public RewritePattern { + DisabledPattern(MLIRContext *context) + : RewritePattern("test.foo", /*benefit=*/0, context, + /*generatedNamed=*/{}) { + setDebugName("DisabledPattern"); + } + + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (op->getNumResults() != 1) + return failure(); + rewriter.eraseOp(op); + return success(); + } +}; + +struct EnabledPattern : public RewritePattern { + EnabledPattern(MLIRContext *context) + : RewritePattern("test.foo", /*benefit=*/0, context, + /*generatedNamed=*/{}) { + setDebugName("EnabledPattern"); + } + + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (op->getNumResults() == 1) + return failure(); + rewriter.eraseOp(op); + return success(); + } +}; + +struct TestDialect : public Dialect { + static StringRef getDialectNamespace() { return "test"; } + + TestDialect(MLIRContext *context) + : Dialect(getDialectNamespace(), context, TypeID::get()) { + allowUnknownOperations(); + } + + void getCanonicalizationPatterns(RewritePatternSet &results) const override { + results.insert(results.getContext()); + } +}; + +TEST(CanonicalizerTest, TestDisablePatterns) { + MLIRContext context; + context.getOrLoadDialect(); + PassManager mgr(&context); + mgr.addPass( + createCanonicalizerPass(GreedyRewriteConfig(), {"DisabledPattern"})); + + const char *const code = R"mlir( + %0:2 = "test.foo"() {sym_name = "A"} : () -> (i32, i32) + %1 = "test.foo"() {sym_name = "B"} : () -> (f32) + )mlir"; + + OwningModuleRef module = mlir::parseSourceString(code, &context); + ASSERT_TRUE(succeeded(mgr.run(*module))); + + EXPECT_TRUE(module->lookupSymbol("B")); + EXPECT_FALSE(module->lookupSymbol("A")); +} + +} // end anonymous namespace diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index 5ed0c64ad3f8..b35da47f59cb 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -154,6 +154,39 @@ struct RecursiveScheduleTreeVisitor } }; +/// Recursively visit all nodes of a schedule tree while allowing changes. +/// +/// The visit methods return an isl::schedule_node that is used to continue +/// visiting the tree. Structural changes such as returning a different node +/// will confuse the visitor. +template +struct ScheduleNodeRewriter + : public RecursiveScheduleTreeVisitor { + Derived &getDerived() { return *static_cast(this); } + const Derived &getDerived() const { + return *static_cast(this); + } + + isl::schedule_node visitNode(isl::schedule_node Node, Args... args) { + return getDerived().visitChildren(Node); + } + + isl::schedule_node visitChildren(isl::schedule_node Node, Args... args) { + if (!Node.has_children()) + return Node; + + isl::schedule_node It = Node.first_child(); + while (true) { + It = getDerived().visit(It, std::forward(args)...); + if (!It.has_next_sibling()) + break; + It = It.next_sibling(); + } + return It.parent(); + } +}; + /// Is this node the marker for its parent band? bool isBandMark(const isl::schedule_node &Node); diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index ab0bcdaf8ef5..6497275df610 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -671,7 +671,7 @@ IslAstInfo IslAstAnalysis::run(Scop &S, ScopAnalysisManager &SAM, return SAM.getResult(S, SAR).getDependences(Lvl); }; - return std::move(*runIslAst(S, GetDeps).release()); + return std::move(*runIslAst(S, GetDeps)); } static __isl_give isl_printer *cbPrintUser(__isl_take isl_printer *P, diff --git a/polly/lib/External/isl/isl_int_sioimath.h b/polly/lib/External/isl/isl_int_sioimath.h index a2112cd8e2fb..dc691b8a0b1e 100644 --- a/polly/lib/External/isl/isl_int_sioimath.h +++ b/polly/lib/External/isl/isl_int_sioimath.h @@ -868,7 +868,6 @@ inline void isl_sioimath_tdiv_q(isl_sioimath_ptr dst, isl_sioimath_src lhs, isl_sioimath_bigarg_src(rhs, &rhsscratch), isl_sioimath_reinit_big(dst), NULL); isl_sioimath_try_demote(dst); - return; } /* Divide lhs by an unsigned long rhs, rounding to zero (Truncate). diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 002674375df0..8f175596d711 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -472,7 +472,7 @@ class SCEVInRegionDependences { // are strictly not necessary by tracking the invariant load as a // scalar. LoadInst *LI = dyn_cast(Inst); - if (LI && ILS.count(LI) > 0) + if (LI && ILS.contains(LI)) return false; } diff --git a/polly/lib/Transform/ManualOptimizer.cpp b/polly/lib/Transform/ManualOptimizer.cpp index 2c05927582e2..ec4c584b27e2 100644 --- a/polly/lib/Transform/ManualOptimizer.cpp +++ b/polly/lib/Transform/ManualOptimizer.cpp @@ -271,7 +271,6 @@ class SearchTransformVisitor } // not a loop transformation; look for next property - continue; } } diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 03878d5c8e4b..0a6461139542 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -384,6 +384,19 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node, return Result; } +struct InsertSimdMarkers : public ScheduleNodeRewriter { + isl::schedule_node visitBand(isl::schedule_node_band Band) { + isl::schedule_node Node = visitChildren(Band); + + // Only add SIMD markers to innermost bands. + if (!Node.first_child().isa()) + return Node; + + isl::id LoopMarker = isl::id::alloc(Band.ctx(), "SIMD", nullptr); + return Band.insert_mark(LoopMarker); + } +}; + isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) { assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); @@ -408,16 +421,19 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( Node = Node.child(0); // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, // we will have troubles to match it in the backend. - isl::schedule_node_band NodeBand = - Node.as().set_ast_build_options( - isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }")); - Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release())); - Node = Node.child(0); - if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf) - Node = Node.parent(); - auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr); + Node = Node.as().set_ast_build_options( + isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }")); + + // Sink the inner loop into the smallest possible statements to make them + // represent a single vector instruction if possible. + Node = isl::manage(isl_schedule_node_band_sink(Node.release())); + + // Add SIMD markers to those vector statements. + InsertSimdMarkers SimdMarkerInserter; + Node = SimdMarkerInserter.visit(Node); + PrevectOpts++; - return Node.insert_mark(LoopMarker); + return Node.parent(); } static bool isSimpleInnermostBand(const isl::schedule_node &Node) { diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp index a2cb538021fb..01f18eadb4d9 100644 --- a/polly/lib/Transform/ScheduleTreeTransform.cpp +++ b/polly/lib/Transform/ScheduleTreeTransform.cpp @@ -118,35 +118,6 @@ static isl::schedule rebuildBand(isl::schedule_node_band OldBand, return NewBand.get_schedule(); } -/// Recursively visit all nodes of a schedule tree while allowing changes. -/// -/// The visit methods return an isl::schedule_node that is used to continue -/// visiting the tree. Structural changes such as returning a different node -/// will confuse the visitor. -template -struct ScheduleNodeRewriter - : public RecursiveScheduleTreeVisitor { - Derived &getDerived() { return *static_cast(this); } - const Derived &getDerived() const { - return *static_cast(this); - } - - isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) { - if (!Node.has_children()) - return Node; - - isl::schedule_node It = Node.first_child(); - while (true) { - It = getDerived().visit(It, std::forward(args)...); - if (!It.has_next_sibling()) - break; - It = It.next_sibling(); - } - return It.parent(); - } -}; - /// Rewrite a schedule tree by reconstructing it bottom-up. /// /// By default, the original schedule tree is reconstructed. To build a diff --git a/polly/lib/Transform/ScopInliner.cpp b/polly/lib/Transform/ScopInliner.cpp index ed54731c6b2a..b35d3518e72d 100644 --- a/polly/lib/Transform/ScopInliner.cpp +++ b/polly/lib/Transform/ScopInliner.cpp @@ -84,7 +84,7 @@ class ScopInliner : public CallGraphSCCPass { ScopDetection &SD = FAM.getResult(*F); const bool HasScopAsTopLevelRegion = - SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; + SD.ValidRegions.contains(RI.getTopLevelRegion()); bool Changed = false; if (HasScopAsTopLevelRegion) { diff --git a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll index 0c0bc12eb4cd..9d0c6b5b9479 100644 --- a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll +++ b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll @@ -80,15 +80,17 @@ cleanup: ; preds = %for.cond, %entry ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i0) mod 32)]; Stmt_for_body23[i0, i1] -> [((i0) mod 32)] }]" ; CHECK: permutable: 1 ; CHECK: child: -; CHECK: mark: "SIMD" -; CHECK: child: -; CHECK: sequence: -; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }" +; CHECK: sequence: +; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }" +; CHECK: child: +; CHECK: mark: "SIMD" ; CHECK: child: ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]" ; CHECK: permutable: 1 ; CHECK: coincident: [ 1 ] -; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }" +; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }" +; CHECK: child: +; CHECK: mark: "SIMD" ; CHECK: child: ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]" ; CHECK: permutable: 1 diff --git a/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll new file mode 100644 index 000000000000..3bd1f9838500 --- /dev/null +++ b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-vectorizer=stripmine -polly-isl-arg=--no-schedule-serialize-sccs -polly-tiling=0 -polly-opt-isl -analyze - < %s | FileCheck %s + +; isl_schedule_node_band_sink may sink into multiple children. +; https://llvm.org/PR52637 + +%struct.v4l2_sliced_vbi_data = type { [48 x i8] } + +define void @vivid_vbi_gen_sliced() { +entry: + br label %for.body + +for.body: ; preds = %vivid_vbi_gen_teletext.exit, %entry + %i.015 = phi i32 [ 0, %entry ], [ %inc, %vivid_vbi_gen_teletext.exit ] + %data0.014 = phi %struct.v4l2_sliced_vbi_data* [ null, %entry ], [ %incdec.ptr, %vivid_vbi_gen_teletext.exit ] + %arraydecay = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 0 + %arrayidx.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 6 + %0 = load i8, i8* %arrayidx.i, align 1 + store i8 %0, i8* %arraydecay, align 1 + br label %for.body.for.body_crit_edge.i + +for.body.for.body_crit_edge.i: ; preds = %for.body.for.body_crit_edge.i, %for.body + %inc10.i13 = phi i32 [ 1, %for.body ], [ %inc10.i, %for.body.for.body_crit_edge.i ] + %arrayidx2.phi.trans.insert.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 %inc10.i13 + store i8 0, i8* %arrayidx2.phi.trans.insert.i, align 1 + %inc10.i = add nuw nsw i32 %inc10.i13, 1 + %exitcond.not.i = icmp eq i32 %inc10.i13, 42 + br i1 %exitcond.not.i, label %vivid_vbi_gen_teletext.exit, label %for.body.for.body_crit_edge.i + +vivid_vbi_gen_teletext.exit: ; preds = %for.body.for.body_crit_edge.i + %incdec.ptr = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 1 + %inc = add nuw nsw i32 %i.015, 1 + %exitcond.not = icmp eq i32 %i.015, 1 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %vivid_vbi_gen_teletext.exit + ret void +} + + +; CHECK: schedule: +; CHECK: schedule: +; CHECK: mark: "SIMD" +; CHECK: schedule: +; CHECK: mark: "SIMD" +; CHECK: schedule: diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 7c31359fb4ed..85d79a29b571 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4207,6 +4207,45 @@ cc_binary( ], ) +gentbl( + name = "TLICheckerOptsTableGen", + strip_include_prefix = "tools/llvm-tli-checker", + tbl_outs = [( + "-gen-opt-parser-defs", + "tools/llvm-tli-checker/Opts.inc", + )], + tblgen = ":llvm-tblgen", + td_file = "tools/llvm-tli-checker/Opts.td", + td_srcs = ["include/llvm/Option/OptParser.td"], +) + +cc_binary( + name = "llvm-tli-checker", + testonly = True, + srcs = glob([ + "tools/llvm-tli-checker/*.cpp", + "tools/llvm-tli-checker/*.h", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":BinaryFormat", + ":BitReader", + ":BitstreamReader", + ":Core", + ":Demangle", + ":MC", + ":MCParser", + ":Object", + ":Option", + ":Remarks", + ":Support", + ":TextAPI", + ":TLICheckerOptsTableGen", + ], +) + cc_binary( name = "obj2yaml", testonly = True, diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel index 095841a60e94..73560d698081 100644 --- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel @@ -75,6 +75,20 @@ cc_test( ], ) +cc_test( + name = "rewrite_tests", + size = "small", + srcs = glob([ + "Rewrite/*.cpp", + "Rewrite/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:IR", + "//mlir:Rewrite", + ], +) + cc_test( name = "dialect_tests", size = "small", @@ -104,6 +118,21 @@ cc_test( ], ) +cc_test( + name = "scf_tests", + size = "small", + srcs = glob([ + "Dialect/SCF/*.cpp", + "Dialect/SCF/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:Parser", + "//mlir:SCFDialect", + "//mlir:StandardOps", + ], +) + cc_test( name = "sparse_tensor_tests", size = "small", @@ -228,6 +257,23 @@ cc_test( ], ) +cc_test( + name = "transforms_test", + size = "small", + srcs = glob([ + "Transforms/*.cpp", + "Transforms/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:IR", + "//mlir:Parser", + "//mlir:Pass", + "//mlir:TransformUtils", + "//mlir:Transforms", + ], +) + cc_test( name = "analysis_tests", size = "small", @@ -245,6 +291,22 @@ cc_test( ], ) +cc_test( + name = "conversion_tests", + size = "small", + srcs = glob([ + "Conversion/*.cpp", + "Conversion/*.h", + "Conversion/*/*.cpp", + "Conversion/*/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:ArithmeticDialect", + "//mlir:PDLToPDLInterp", + ], +) + cc_test( name = "execution_engine_tests", size = "small",